16 files changed, 901 insertions, 559 deletions
diff --git a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
index 8388d6c97adfe..200e24f428ef3 100644
--- a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
+++ b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
@@ -3,9 +3,9 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK: Function: foo
-; CHECK:   PartialAlias: i32* %Ipointer, i32* %Jpointer
+; CHECK:   MayAlias: i32* %Ipointer, i32* %Jpointer
 ; CHECK: 9 no alias responses
-; CHECK: 6 partial alias responses
+; CHECK: 6 may alias responses
 
 define void @foo(i32* noalias %p, i32* noalias %q, i32 %i, i32 %j) {
   %Ipointer = getelementptr i32, i32* %p, i32 %i
diff --git a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
index b2e7a60047bd3..79421824e6ea2 100644
--- a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
+++ b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:32:32:32"
 
-; CHECK: 1 partial alias response
+; CHECK: 1 may alias responses
 
 define i32 @test(i32* %tab, i32 %indvar) nounwind {
   %tmp31 = mul i32 %indvar, -2
diff --git a/test/Analysis/BasicAA/bug.23540.ll b/test/Analysis/BasicAA/bug.23540.ll
index f693bcf73cd63..6a00abdce3a10 100644
--- a/test/Analysis/BasicAA/bug.23540.ll
+++ b/test/Analysis/BasicAA/bug.23540.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @c = external global i32
 
 ; CHECK-LABEL: f
-; CHECK: PartialAlias: i32* %arrayidx, i32* %arrayidx6
+; CHECK: MayAlias: i32* %arrayidx, i32* %arrayidx6
 define void @f() {
   %idxprom = zext i32 undef to i64
   %add4 = add i32 0, 1
diff --git a/test/Analysis/BasicAA/bug.23626.ll b/test/Analysis/BasicAA/bug.23626.ll
index 6a1478c65cefd..7d5b5ad06698d 100644
--- a/test/Analysis/BasicAA/bug.23626.ll
+++ b/test/Analysis/BasicAA/bug.23626.ll
@@ -3,12 +3,12 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-darwin13.4.0"
 
 ; CHECK-LABEL: compute1
-; CHECK: PartialAlias:	i32* %arrayidx8, i32* %out
-; CHECK: PartialAlias:	i32* %arrayidx11, i32* %out
-; CHECK: PartialAlias:	i32* %arrayidx11, i32* %arrayidx8
-; CHECK: PartialAlias:	i32* %arrayidx14, i32* %out
-; CHECK: PartialAlias:	i32* %arrayidx14, i32* %arrayidx8
-; CHECK: PartialAlias:	i32* %arrayidx11, i32* %arrayidx14
+; CHECK: MayAlias:	i32* %arrayidx8, i32* %out
+; CHECK: MayAlias:	i32* %arrayidx11, i32* %out
+; CHECK: MayAlias:	i32* %arrayidx11, i32* %arrayidx8
+; CHECK: MayAlias:	i32* %arrayidx14, i32* %out
+; CHECK: MayAlias:	i32* %arrayidx14, i32* %arrayidx8
+; CHECK: MayAlias:	i32* %arrayidx11, i32* %arrayidx14
 define void @compute1(i32 %num.0.lcssa, i32* %out) {
   %idxprom = zext i32 %num.0.lcssa to i64
   %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %idxprom
@@ -22,7 +22,7 @@ define void @compute1(i32 %num.0.lcssa, i32* %out) {
 }
 
 ; CHECK-LABEL: compute2
-; CHECK: PartialAlias: i32* %arrayidx11, i32* %out.addr
+; CHECK: MayAlias: i32* %arrayidx11, i32* %out.addr
 define void @compute2(i32 %num, i32* %out.addr) {
   %add9 = add i32 %num, 1
   %idxprom10 = zext i32 %add9 to i64
diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll
index f5e2c7c13617c..f77156305c260 100644
--- a/test/Analysis/BasicAA/constant-over-index.ll
+++ b/test/Analysis/BasicAA/constant-over-index.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-; CHECK: PartialAlias: double* %p.0.i.0, double* %p3
+; CHECK: MayAlias: double* %p.0.i.0, double* %p3
 
 ; %p3 is equal to %p.0.i.0 on the second iteration of the loop,
 ; so MayAlias is needed.  In practice, basicaa returns PartialAlias
diff --git a/test/Analysis/BasicAA/fallback-mayalias.ll b/test/Analysis/BasicAA/fallback-mayalias.ll
new file mode 100644
index 0000000000000..a1e4b12d20ade
--- /dev/null
+++ b/test/Analysis/BasicAA/fallback-mayalias.ll
@@ -0,0 +1,23 @@
+; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
+
+; Check that BasicAA falls back to MayAlias (instead of PartialAlias) when none
+; of its little tricks are applicable.
+
+; CHECK: MayAlias: float* %arrayidxA, float* %arrayidxB
+
+define void @fallback_mayalias(float* noalias nocapture %C, i64 %i, i64 %j) local_unnamed_addr {
+entry:
+  %shl = shl i64 %i, 3
+  %mul = shl nsw i64 %j, 4
+  %addA = add nsw i64 %mul, %shl
+  %orB = or i64 %shl, 1
+  %addB = add nsw i64 %mul, %orB
+
+  %arrayidxA = getelementptr inbounds float, float* %C, i64 %addA
+  store float undef, float* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds float, float* %C, i64 %addB
+  store float undef, float* %arrayidxB, align 4
+
+  ret void
+}
diff --git a/test/Analysis/BasicAA/q.bad.ll b/test/Analysis/BasicAA/q.bad.ll
index f2de6a76c5e01..2c7bc1d8591ee 100644
--- a/test/Analysis/BasicAA/q.bad.ll
+++ b/test/Analysis/BasicAA/q.bad.ll
@@ -15,7 +15,7 @@ define void @test_zext_sext_amounts255(i8* %mem) {
 }
 
 ; CHECK-LABEL: test_zext_sext_amounts
-; CHECK: PartialAlias: i8* %a, i8* %b
+; CHECK: MayAlias: i8* %a, i8* %b
 ; %a and %b only PartialAlias as, although they're both zext(sext(%num)) they'll extend the sign by a different
 ; number of bits before zext-ing the remainder.
 define void @test_zext_sext_amounts(i8* %mem, i8 %num) {
@@ -44,9 +44,9 @@ define void @based_on_pr18068(i32 %loaded, i8* %mem) {
 }
 
 ; CHECK-LABEL: test_path_dependence
-; CHECK: PartialAlias: i8* %a, i8* %b
+; CHECK: MayAlias: i8* %a, i8* %b
 ; CHECK: MustAlias: i8* %a, i8* %c
-; CHECK: PartialAlias: i8* %a, i8* %d
+; CHECK: MayAlias: i8* %a, i8* %d
 define void @test_path_dependence(i32 %p, i8* %mem) {
   %p.minus1 = add i32 %p, -1 ; this will always unsigned-wrap, unless %p == 0
   %p.minus1.64 = zext i32 %p.minus1 to i64
@@ -83,7 +83,7 @@ define void @test_zext_sext_255(i8* %mem) {
 }
 
 ; CHECK-LABEL: test_zext_sext_num
-; CHECK: PartialAlias: i8* %a, i8* %b
+; CHECK: MayAlias: i8* %a, i8* %b
 ; %a and %b NoAlias if %num == 255 (see @test_zext_sext_255), but %a and %b NoAlias for other values of %num (e.g. 0)
 define void @test_zext_sext_num(i8* %mem, i8 %num) {
   %zext.num = zext i8 %num to i16
@@ -142,9 +142,9 @@ define void @constantOffsetHeuristic_i8_i32(i32* %mem, i8 %val) {
 }
 
 ; CHECK-LABEL: constantOffsetHeuristic_i3_i8
-; CHECK: PartialAlias:  i32* %a, i32* %b
+; CHECK: MayAlias:  i32* %a, i32* %b
 ; CHECK: NoAlias:  i32* %a, i32* %c
-; CHECK: PartialAlias:  i32* %b, i32* %c
+; CHECK: MayAlias:  i32* %b, i32* %c
 define void @constantOffsetHeuristic_i3_i8(i8* %mem, i3 %val) {
   %zext.plus.7 = add nsw i3 %val, 7
   %zext.plus.4 = add nsw i3 %val, 4
@@ -161,7 +161,7 @@ define void @constantOffsetHeuristic_i3_i8(i8* %mem, i3 %val) {
 }
 
 ; CHECK-LABEL: constantOffsetHeuristic_i8_i8
-; CHECK: PartialAlias:  i32* %a, i32* %b
+; CHECK: MayAlias:  i32* %a, i32* %b
 ; CHECK: NoAlias:  i32* %a, i32* %c
 ; CHECK: NoAlias:  i32* %b, i32* %c
 define void @constantOffsetHeuristic_i8_i8(i8* %mem, i8 %val) {
diff --git a/test/Analysis/BasicAA/returned.ll b/test/Analysis/BasicAA/returned.ll
index c6ef6806140df..f0d0f1ec1fe9b 100644
--- a/test/Analysis/BasicAA/returned.ll
+++ b/test/Analysis/BasicAA/returned.ll
@@ -8,20 +8,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK-DAG: MustAlias: %struct* %st, %struct* %sta
 
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %x
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %y
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %z
+; CHECK-DAG: MayAlias: %struct* %st, i32* %x
+; CHECK-DAG: MayAlias: %struct* %st, i32* %y
+; CHECK-DAG: MayAlias: %struct* %st, i32* %z
 
 ; CHECK-DAG: NoAlias: i32* %x, i32* %y
 ; CHECK-DAG: NoAlias: i32* %x, i32* %z
 ; CHECK-DAG: NoAlias: i32* %y, i32* %z
 
-; CHECK-DAG: PartialAlias: %struct* %st, %struct* %y_12
-; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x
-; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10
+; CHECK-DAG: MayAlias: %struct* %st, %struct* %y_12
+; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x
+; CHECK-DAG: MayAlias: i32* %x, i80* %y_10
 
-; CHECK-DAG: PartialAlias: %struct* %st, i64* %y_8
-; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8
+; CHECK-DAG: MayAlias: %struct* %st, i64* %y_8
+; CHECK-DAG: MayAlias: i32* %z, i64* %y_8
 ; CHECK-DAG: NoAlias: i32* %x, i64* %y_8
 
 ; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y
diff --git a/test/Analysis/BasicAA/sequential-gep.ll b/test/Analysis/BasicAA/sequential-gep.ll
index c17a782aa04b6..5bedab61e17dc 100644
--- a/test/Analysis/BasicAA/sequential-gep.ll
+++ b/test/Analysis/BasicAA/sequential-gep.ll
@@ -11,7 +11,7 @@ define void @t1([8 x i32]* %p, i32 %addend, i32* %q) {
 }
 
 ; CHECK: Function: t2
-; CHECK: PartialAlias: i32* %gep1, i32* %gep2
+; CHECK: MayAlias: i32* %gep1, i32* %gep2
 define void @t2([8 x i32]* %p, i32 %addend, i32* %q) {
   %knownnonzero = load i32, i32* %q, !range !0
   %add = add nsw nuw i32 %addend, %knownnonzero
@@ -31,7 +31,7 @@ define void @t3([8 x i32]* %p, i32 %addend, i32* %q) {
 }
 
 ; CHECK: Function: t4
-; CHECK: PartialAlias: i32* %gep1, i32* %gep2
+; CHECK: MayAlias: i32* %gep1, i32* %gep2
 define void @t4([8 x i32]* %p, i32 %addend, i32* %q) {
   %knownnonzero = load i32, i32* %q, !range !0
   %add = add nsw nuw i32 %addend, %knownnonzero
@@ -41,7 +41,7 @@ define void @t4([8 x i32]* %p, i32 %addend, i32* %q) {
 }
 
 ; CHECK: Function: t5
-; CHECK: PartialAlias: i32* %gep2, i64* %bc
+; CHECK: MayAlias: i32* %gep2, i64* %bc
 define void @t5([8 x i32]* %p, i32 %addend, i32* %q) {
   %knownnonzero = load i32, i32* %q, !range !0
   %add = add nsw nuw i32 %addend, %knownnonzero
diff --git a/test/Analysis/BasicAA/struct-geps.ll b/test/Analysis/BasicAA/struct-geps.ll
index 2d85e1dd0173d..e048baf4c64a9 100644
--- a/test/Analysis/BasicAA/struct-geps.ll
+++ b/test/Analysis/BasicAA/struct-geps.ll
@@ -6,20 +6,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK-LABEL: test_simple
 
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %x
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %y
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %z
+; CHECK-DAG: MayAlias: %struct* %st, i32* %x
+; CHECK-DAG: MayAlias: %struct* %st, i32* %y
+; CHECK-DAG: MayAlias: %struct* %st, i32* %z
 
 ; CHECK-DAG: NoAlias: i32* %x, i32* %y
 ; CHECK-DAG: NoAlias: i32* %x, i32* %z
 ; CHECK-DAG: NoAlias: i32* %y, i32* %z
 
-; CHECK-DAG: PartialAlias: %struct* %st, %struct* %y_12
-; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x
-; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10
+; CHECK-DAG: MayAlias: %struct* %st, %struct* %y_12
+; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x
+; CHECK-DAG: MayAlias: i32* %x, i80* %y_10
 
-; CHECK-DAG: PartialAlias: %struct* %st, i64* %y_8
-; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8
+; CHECK-DAG: MayAlias: %struct* %st, i64* %y_8
+; CHECK-DAG: MayAlias: i32* %z, i64* %y_8
 ; CHECK-DAG: NoAlias: i32* %x, i64* %y_8
 
 ; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y
@@ -38,20 +38,20 @@ define void @test_simple(%struct* %st, i64 %i, i64 %j, i64 %k) {
 
 ; CHECK-LABEL: test_in_array
 
-; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i32* %x
-; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i32* %y
-; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i32* %z
+; CHECK-DAG: MayAlias: [1 x %struct]* %st, i32* %x
+; CHECK-DAG: MayAlias: [1 x %struct]* %st, i32* %y
+; CHECK-DAG: MayAlias: [1 x %struct]* %st, i32* %z
 
 ; CHECK-DAG: NoAlias: i32* %x, i32* %y
 ; CHECK-DAG: NoAlias: i32* %x, i32* %z
 ; CHECK-DAG: NoAlias: i32* %y, i32* %z
 
-; CHECK-DAG: PartialAlias: %struct* %y_12, [1 x %struct]* %st
-; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x
-; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10
+; CHECK-DAG: MayAlias: %struct* %y_12, [1 x %struct]* %st
+; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x
+; CHECK-DAG: MayAlias: i32* %x, i80* %y_10
 
-; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i64* %y_8
-; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8
+; CHECK-DAG: MayAlias: [1 x %struct]* %st, i64* %y_8
+; CHECK-DAG: MayAlias: i32* %z, i64* %y_8
 ; CHECK-DAG: NoAlias: i32* %x, i64* %y_8
 
 ; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y
@@ -70,20 +70,20 @@ define void @test_in_array([1 x %struct]* %st, i64 %i, i64 %j, i64 %k, i64 %i1,
 
 ; CHECK-LABEL: test_in_3d_array
 
-; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %x
-; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %y
-; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %z
+; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %x
+; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %y
+; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %z
 
 ; CHECK-DAG: NoAlias: i32* %x, i32* %y
 ; CHECK-DAG: NoAlias: i32* %x, i32* %z
 ; CHECK-DAG: NoAlias: i32* %y, i32* %z
 
-; CHECK-DAG: PartialAlias: %struct* %y_12, [1 x [1 x [1 x %struct]]]* %st
-; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x
-; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10
+; CHECK-DAG: MayAlias: %struct* %y_12, [1 x [1 x [1 x %struct]]]* %st
+; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x
+; CHECK-DAG: MayAlias: i32* %x, i80* %y_10
 
-; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i64* %y_8
-; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8
+; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i64* %y_8
+; CHECK-DAG: MayAlias: i32* %z, i64* %y_8
 ; CHECK-DAG: NoAlias: i32* %x, i64* %y_8
 
 ; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y
@@ -106,14 +106,14 @@ define void @test_in_3d_array([1 x [1 x [1 x %struct]]]* %st, i64 %i, i64 %j, i6
 ; CHECK-DAG: NoAlias: i32* %y, i32* %y2
 ; CHECK-DAG: NoAlias: i32* %z, i32* %z2
 
-; CHECK-DAG: PartialAlias: i32* %x, i32* %y2
-; CHECK-DAG: PartialAlias: i32* %x, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x, i32* %y2
+; CHECK-DAG: MayAlias: i32* %x, i32* %z2
 
-; CHECK-DAG: PartialAlias: i32* %x2, i32* %y
-; CHECK-DAG: PartialAlias: i32* %y, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x2, i32* %y
+; CHECK-DAG: MayAlias: i32* %y, i32* %z2
 
-; CHECK-DAG: PartialAlias: i32* %x2, i32* %z
-; CHECK-DAG: PartialAlias: i32* %y2, i32* %z
+; CHECK-DAG: MayAlias: i32* %x2, i32* %z
+; CHECK-DAG: MayAlias: i32* %y2, i32* %z
 
 define void @test_same_underlying_object_same_indices(%struct* %st, i64 %i, i64 %j, i64 %k) {
   %st2 = getelementptr %struct, %struct* %st, i32 10
@@ -128,18 +128,18 @@ define void @test_same_underlying_object_same_indices(%struct* %st, i64 %i, i64
 
 ; CHECK-LABEL: test_same_underlying_object_different_indices
 
-; CHECK-DAG: PartialAlias: i32* %x, i32* %x2
-; CHECK-DAG: PartialAlias: i32* %y, i32* %y2
-; CHECK-DAG: PartialAlias: i32* %z, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x, i32* %x2
+; CHECK-DAG: MayAlias: i32* %y, i32* %y2
+; CHECK-DAG: MayAlias: i32* %z, i32* %z2
 
-; CHECK-DAG: PartialAlias: i32* %x, i32* %y2
-; CHECK-DAG: PartialAlias: i32* %x, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x, i32* %y2
+; CHECK-DAG: MayAlias: i32* %x, i32* %z2
 
-; CHECK-DAG: PartialAlias: i32* %x2, i32* %y
-; CHECK-DAG: PartialAlias: i32* %y, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x2, i32* %y
+; CHECK-DAG: MayAlias: i32* %y, i32* %z2
 
-; CHECK-DAG: PartialAlias: i32* %x2, i32* %z
-; CHECK-DAG: PartialAlias: i32* %y2, i32* %z
+; CHECK-DAG: MayAlias: i32* %x2, i32* %z
+; CHECK-DAG: MayAlias: i32* %y2, i32* %z
 
 define void @test_same_underlying_object_different_indices(%struct* %st, i64 %i1, i64 %j1, i64 %k1, i64 %i2, i64 %k2, i64 %j2) {
   %st2 = getelementptr %struct, %struct* %st, i32 10
diff --git a/test/Analysis/BasicAA/zext.ll b/test/Analysis/BasicAA/zext.ll
index 685d45be61512..f8f02353db2ec 100644
--- a/test/Analysis/BasicAA/zext.ll
+++ b/test/Analysis/BasicAA/zext.ll
@@ -69,7 +69,7 @@ for.loop.exit:
 }
 
 ; CHECK-LABEL: test_sign_extension
-; CHECK:  PartialAlias: i64* %b.i64, i8* %a
+; CHECK:  MayAlias: i64* %b.i64, i8* %a
 
 define void @test_sign_extension(i32 %p) {
   %1 = tail call i8* @malloc(i64 120)
@@ -83,7 +83,7 @@ define void @test_sign_extension(i32 %p) {
 }
 
 ; CHECK-LABEL: test_fe_tools
-; CHECK:  PartialAlias: i32* %a, i32* %b
+; CHECK:  MayAlias: i32* %a, i32* %b
 
 define void @test_fe_tools([8 x i32]* %values) {
   br label %reorder
@@ -108,7 +108,7 @@ for.loop.exit:
 @d = global i32 0, align 4
 
 ; CHECK-LABEL: test_spec2006
-; CHECK:  PartialAlias: i32** %x, i32** %y
+; CHECK:  MayAlias: i32** %x, i32** %y
 
 define void @test_spec2006() {
   %h = alloca [1 x [2 x i32*]], align 16
@@ -164,7 +164,7 @@ for.loop.exit:
 }
 
 ; CHECK-LABEL: test_modulo_analysis_with_global
-; CHECK:  PartialAlias: i32** %x, i32** %y
+; CHECK:  MayAlias: i32** %x, i32** %y
 
 define void @test_modulo_analysis_with_global() {
   %h = alloca [1 x [2 x i32*]], align 16
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index b7a615f55cdef..d9e06a3e7b411 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -1,516 +1,564 @@
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; CHECK-LABEL: 'add'
 define i32 @add(i32 %arg) {
-  ; SSSE3: cost of 1 {{.*}} %A = add
-  ; SSE42: cost of 1 {{.*}} %A = add
-  ; AVX: cost of 1 {{.*}} %A = add
-  ; AVX2: cost of 1 {{.*}} %A = add
-  ; AVX512: cost of 1 {{.*}} %A = add
-  %A = add <2 x i64> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %B = add
-  ; SSE42: cost of 2 {{.*}} %B = add
-  ; AVX: cost of 4 {{.*}} %B = add
-  ; AVX2: cost of 1 {{.*}} %B = add
-  ; AVX512: cost of 1 {{.*}} %B = add
-  %B = add <4 x i64> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %C = add
-  ; SSE42: cost of 4 {{.*}} %C = add
-  ; AVX: cost of 8 {{.*}} %C = add
-  ; AVX2: cost of 2 {{.*}} %C = add
-  ; AVX512: cost of 1 {{.*}} %C = add
-  %C = add <8 x i64> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %D = add
-  ; SSE42: cost of 1 {{.*}} %D = add
-  ; AVX: cost of 1 {{.*}} %D = add
-  ; AVX2: cost of 1 {{.*}} %D = add
-  ; AVX512: cost of 1 {{.*}} %D = add
-  %D = add <4 x i32> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %E = add
-  ; SSE42: cost of 2 {{.*}} %E = add
-  ; AVX: cost of 4 {{.*}} %E = add
-  ; AVX2: cost of 1 {{.*}} %E = add
-  ; AVX512: cost of 1 {{.*}} %E = add
-  %E = add <8 x i32> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %F = add
-  ; SSE42: cost of 4 {{.*}} %F = add
-  ; AVX: cost of 8 {{.*}} %F = add
-  ; AVX2: cost of 2 {{.*}} %F = add
-  ; AVX512: cost of 1 {{.*}} %F = add
-  %F = add <16 x i32> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %G = add
-  ; SSE42: cost of 1 {{.*}} %G = add
-  ; AVX: cost of 1 {{.*}} %G = add
-  ; AVX2: cost of 1 {{.*}} %G = add
-  ; AVX512: cost of 1 {{.*}} %G = add
-  %G = add <8 x i16> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %H = add
-  ; SSE42: cost of 2 {{.*}} %H = add
-  ; AVX: cost of 4 {{.*}} %H = add
-  ; AVX2: cost of 1 {{.*}} %H = add
-  ; AVX512: cost of 1 {{.*}} %H = add
-  %H = add <16 x i16> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %I = add
-  ; SSE42: cost of 4 {{.*}} %I = add
-  ; AVX: cost of 8 {{.*}} %I = add
-  ; AVX2: cost of 2 {{.*}} %I = add
-  ; AVX512F: cost of 2 {{.*}} %I = add
-  ; AVX512BW: cost of 1 {{.*}} %I = add
-  %I = add <32 x i16> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %J = add
-  ; SSE42: cost of 1 {{.*}} %J = add
-  ; AVX: cost of 1 {{.*}} %J = add
-  ; AVX2: cost of 1 {{.*}} %J = add
-  ; AVX512: cost of 1 {{.*}} %J = add
-  %J = add <16 x i8> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %K = add
-  ; SSE42: cost of 2 {{.*}} %K = add
-  ; AVX: cost of 4 {{.*}} %K = add
-  ; AVX2: cost of 1 {{.*}} %K = add
-  ; AVX512: cost of 1 {{.*}} %K = add
-  %K = add <32 x i8> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %L = add
-  ; SSE42: cost of 4 {{.*}} %L = add
-  ; AVX: cost of 8 {{.*}} %L = add
-  ; AVX2: cost of 2 {{.*}} %L = add
-  ; AVX512F: cost of 2 {{.*}} %L = add
-  ; AVX512BW: cost of 1 {{.*}} %L = add
-  %L = add <64 x i8> undef, undef
+  ; CHECK: cost of 1 {{.*}} %I64 = add
+  %I64 = add i64 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V2I64 = add
+  ; SSE42: cost of 1 {{.*}} %V2I64 = add
+  ; AVX: cost of 1 {{.*}} %V2I64 = add
+  ; AVX2: cost of 1 {{.*}} %V2I64 = add
+  ; AVX512: cost of 1 {{.*}} %V2I64 = add
+  %V2I64 = add <2 x i64> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V4I64 = add
+  ; SSE42: cost of 2 {{.*}} %V4I64 = add
+  ; AVX: cost of 4 {{.*}} %V4I64 = add
+  ; AVX2: cost of 1 {{.*}} %V4I64 = add
+  ; AVX512: cost of 1 {{.*}} %V4I64 = add
+  %V4I64 = add <4 x i64> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V8I64 = add
+  ; SSE42: cost of 4 {{.*}} %V8I64 = add
+  ; AVX: cost of 8 {{.*}} %V8I64 = add
+  ; AVX2: cost of 2 {{.*}} %V8I64 = add
+  ; AVX512: cost of 1 {{.*}} %V8I64 = add
+  %V8I64 = add <8 x i64> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I32 = add
+  %I32 = add i32 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V4I32 = add
+  ; SSE42: cost of 1 {{.*}} %V4I32 = add
+  ; AVX: cost of 1 {{.*}} %V4I32 = add
+  ; AVX2: cost of 1 {{.*}} %V4I32 = add
+  ; AVX512: cost of 1 {{.*}} %V4I32 = add
+  %V4I32 = add <4 x i32> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V8I32 = add
+  ; SSE42: cost of 2 {{.*}} %V8I32 = add
+  ; AVX: cost of 4 {{.*}} %V8I32 = add
+  ; AVX2: cost of 1 {{.*}} %V8I32 = add
+  ; AVX512: cost of 1 {{.*}} %V8I32 = add
+  %V8I32 = add <8 x i32> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V16I32 = add
+  ; SSE42: cost of 4 {{.*}} %V16I32 = add
+  ; AVX: cost of 8 {{.*}} %V16I32 = add
+  ; AVX2: cost of 2 {{.*}} %V16I32 = add
+  ; AVX512: cost of 1 {{.*}} %V16I32 = add
+  %V16I32 = add <16 x i32> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I16 = add
+  %I16 = add i16 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V8I16 = add
+  ; SSE42: cost of 1 {{.*}} %V8I16 = add
+  ; AVX: cost of 1 {{.*}} %V8I16 = add
+  ; AVX2: cost of 1 {{.*}} %V8I16 = add
+  ; AVX512: cost of 1 {{.*}} %V8I16 = add
+  %V8I16 = add <8 x i16> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V16I16 = add
+  ; SSE42: cost of 2 {{.*}} %V16I16 = add
+  ; AVX: cost of 4 {{.*}} %V16I16 = add
+  ; AVX2: cost of 1 {{.*}} %V16I16 = add
+  ; AVX512: cost of 1 {{.*}} %V16I16 = add
+  %V16I16 = add <16 x i16> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V32I16 = add
+  ; SSE42: cost of 4 {{.*}} %V32I16 = add
+  ; AVX: cost of 8 {{.*}} %V32I16 = add
+  ; AVX2: cost of 2 {{.*}} %V32I16 = add
+  ; AVX512F: cost of 2 {{.*}} %V32I16 = add
+  ; AVX512BW: cost of 1 {{.*}} %V32I16 = add
+  %V32I16 = add <32 x i16> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I8 = add
+  %I8 = add i8 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V16I8 = add
+  ; SSE42: cost of 1 {{.*}} %V16I8 = add
+  ; AVX: cost of 1 {{.*}} %V16I8 = add
+  ; AVX2: cost of 1 {{.*}} %V16I8 = add
+  ; AVX512: cost of 1 {{.*}} %V16I8 = add
+  %V16I8 = add <16 x i8> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V32I8 = add
+  ; SSE42: cost of 2 {{.*}} %V32I8 = add
+  ; AVX: cost of 4 {{.*}} %V32I8 = add
+  ; AVX2: cost of 1 {{.*}} %V32I8 = add
+  ; AVX512: cost of 1 {{.*}} %V32I8 = add
+  %V32I8 = add <32 x i8> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V64I8 = add
+  ; SSE42: cost of 4 {{.*}} %V64I8 = add
+  ; AVX: cost of 8 {{.*}} %V64I8 = add
+  ; AVX2: cost of 2 {{.*}} %V64I8 = add
+  ; AVX512F: cost of 2 {{.*}} %V64I8 = add
+  ; AVX512BW: cost of 1 {{.*}} %V64I8 = add
+  %V64I8 = add <64 x i8> undef, undef
 
   ret i32 undef
 }
 
 ; CHECK-LABEL: 'sub'
 define i32 @sub(i32 %arg) {
-  ; SSSE3: cost of 1 {{.*}} %A = sub
-  ; SSE42: cost of 1 {{.*}} %A = sub
-  ; AVX: cost of 1 {{.*}} %A = sub
-  ; AVX2: cost of 1 {{.*}} %A = sub
-  ; AVX512: cost of 1 {{.*}} %A = sub
-  %A = sub <2 x i64> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %B = sub
-  ; SSE42: cost of 2 {{.*}} %B = sub
-  ; AVX: cost of 4 {{.*}} %B = sub
-  ; AVX2: cost of 1 {{.*}} %B = sub
-  ; AVX512: cost of 1 {{.*}} %B = sub
-  %B = sub <4 x i64> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %C = sub
-  ; SSE42: cost of 4 {{.*}} %C = sub
-  ; AVX: cost of 8 {{.*}} %C = sub
-  ; AVX2: cost of 2 {{.*}} %C = sub
-  ; AVX512: cost of 1 {{.*}} %C = sub
-  %C = sub <8 x i64> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %D = sub
-  ; SSE42: cost of 1 {{.*}} %D = sub
-  ; AVX: cost of 1 {{.*}} %D = sub
-  ; AVX2: cost of 1 {{.*}} %D = sub
-  ; AVX512: cost of 1 {{.*}} %D = sub
-  %D = sub <4 x i32> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %E = sub
-  ; SSE42: cost of 2 {{.*}} %E = sub
-  ; AVX: cost of 4 {{.*}} %E = sub
-  ; AVX2: cost of 1 {{.*}} %E = sub
-  ; AVX512: cost of 1 {{.*}} %E = sub
-  %E = sub <8 x i32> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %F = sub
-  ; SSE42: cost of 4 {{.*}} %F = sub
-  ; AVX: cost of 8 {{.*}} %F = sub
-  ; AVX2: cost of 2 {{.*}} %F = sub
-  ; AVX512: cost of 1 {{.*}} %F = sub
-  %F = sub <16 x i32> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %G = sub
-  ; SSE42: cost of 1 {{.*}} %G = sub
-  ; AVX: cost of 1 {{.*}} %G = sub
-  ; AVX2: cost of 1 {{.*}} %G = sub
-  ; AVX512: cost of 1 {{.*}} %G = sub
-  %G = sub <8 x i16> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %H = sub
-  ; SSE42: cost of 2 {{.*}} %H = sub
-  ; AVX: cost of 4 {{.*}} %H = sub
-  ; AVX2: cost of 1 {{.*}} %H = sub
-  ; AVX512: cost of 1 {{.*}} %H = sub
-  %H = sub <16 x i16> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %I = sub
-  ; SSE42: cost of 4 {{.*}} %I = sub
-  ; AVX: cost of 8 {{.*}} %I = sub
-  ; AVX2: cost of 2 {{.*}} %I = sub
-  ; AVX512F: cost of 2 {{.*}} %I = sub
-  ; AVX512BW: cost of 1 {{.*}} %I = sub
-  %I = sub <32 x i16> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %J = sub
-  ; SSE42: cost of 1 {{.*}} %J = sub
-  ; AVX: cost of 1 {{.*}} %J = sub
-  ; AVX2: cost of 1 {{.*}} %J = sub
-  ; AVX512: cost of 1 {{.*}} %J = sub
-  %J = sub <16 x i8> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %K = sub
-  ; SSE42: cost of 2 {{.*}} %K = sub
-  ; AVX: cost of 4 {{.*}} %K = sub
-  ; AVX2: cost of 1 {{.*}} %K = sub
-  ; AVX512: cost of 1 {{.*}} %K = sub
-  %K = sub <32 x i8> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %L = sub
-  ; SSE42: cost of 4 {{.*}} %L = sub
-  ; AVX: cost of 8 {{.*}} %L = sub
-  ; AVX2: cost of 2 {{.*}} %L = sub
-  ; AVX512F: cost of 2 {{.*}} %L = sub
-  ; AVX512BW: cost of 1 {{.*}} %L = sub
-  %L = sub <64 x i8> undef, undef
+  ; CHECK: cost of 1 {{.*}} %I64 = sub
+  %I64 = sub i64 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V2I64 = sub
+  ; SSE42: cost of 1 {{.*}} %V2I64 = sub
+  ; AVX: cost of 1 {{.*}} %V2I64 = sub
+  ; AVX2: cost of 1 {{.*}} %V2I64 = sub
+  ; AVX512: cost of 1 {{.*}} %V2I64 = sub
+  %V2I64 = sub <2 x i64> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V4I64 = sub
+  ; SSE42: cost of 2 {{.*}} %V4I64 = sub
+  ; AVX: cost of 4 {{.*}} %V4I64 = sub
+  ; AVX2: cost of 1 {{.*}} %V4I64 = sub
+  ; AVX512: cost of 1 {{.*}} %V4I64 = sub
+  %V4I64 = sub <4 x i64> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V8I64 = sub
+  ; SSE42: cost of 4 {{.*}} %V8I64 = sub
+  ; AVX: cost of 8 {{.*}} %V8I64 = sub
+  ; AVX2: cost of 2 {{.*}} %V8I64 = sub
+  ; AVX512: cost of 1 {{.*}} %V8I64 = sub
+  %V8I64 = sub <8 x i64> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I32 = sub
+  %I32 = sub i32 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V4I32 = sub
+  ; SSE42: cost of 1 {{.*}} %V4I32 = sub
+  ; AVX: cost of 1 {{.*}} %V4I32 = sub
+  ; AVX2: cost of 1 {{.*}} %V4I32 = sub
+  ; AVX512: cost of 1 {{.*}} %V4I32 = sub
+  %V4I32 = sub <4 x i32> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V8I32 = sub
+  ; SSE42: cost of 2 {{.*}} %V8I32 = sub
+  ; AVX: cost of 4 {{.*}} %V8I32 = sub
+  ; AVX2: cost of 1 {{.*}} %V8I32 = sub
+  ; AVX512: cost of 1 {{.*}} %V8I32 = sub
+  %V8I32 = sub <8 x i32> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V16I32 = sub
+  ; SSE42: cost of 4 {{.*}} %V16I32 = sub
+  ; AVX: cost of 8 {{.*}} %V16I32 = sub
+  ; AVX2: cost of 2 {{.*}} %V16I32 = sub
+  ; AVX512: cost of 1 {{.*}} %V16I32 = sub
+  %V16I32 = sub <16 x i32> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I16 = sub
+  %I16 = sub i16 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V8I16 = sub
+  ; SSE42: cost of 1 {{.*}} %V8I16 = sub
+  ; AVX: cost of 1 {{.*}} %V8I16 = sub
+  ; AVX2: cost of 1 {{.*}} %V8I16 = sub
+  ; AVX512: cost of 1 {{.*}} %V8I16 = sub
+  %V8I16 = sub <8 x i16> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V16I16 = sub
+  ; SSE42: cost of 2 {{.*}} %V16I16 = sub
+  ; AVX: cost of 4 {{.*}} %V16I16 = sub
+  ; AVX2: cost of 1 {{.*}} %V16I16 = sub
+  ; AVX512: cost of 1 {{.*}} %V16I16 = sub
+  %V16I16 = sub <16 x i16> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V32I16 = sub
+  ; SSE42: cost of 4 {{.*}} %V32I16 = sub
+  ; AVX: cost of 8 {{.*}} %V32I16 = sub
+  ; AVX2: cost of 2 {{.*}} %V32I16 = sub
+  ; AVX512F: cost of 2 {{.*}} %V32I16 = sub
+  ; AVX512BW: cost of 1 {{.*}} %V32I16 = sub
+  %V32I16 = sub <32 x i16> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I8 = sub
+  %I8 = sub i8 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V16I8 = sub
+  ; SSE42: cost of 1 {{.*}} %V16I8 = sub
+  ; AVX: cost of 1 {{.*}} %V16I8 = sub
+  ; AVX2: cost of 1 {{.*}} %V16I8 = sub
+  ; AVX512: cost of 1 {{.*}} %V16I8 = sub
+  %V16I8 = sub <16 x i8> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V32I8 = sub
+  ; SSE42: cost of 2 {{.*}} %V32I8 = sub
+  ; AVX: cost of 4 {{.*}} %V32I8 = sub
+  ; AVX2: cost of 1 {{.*}} %V32I8 = sub
+  ; AVX512: cost of 1 {{.*}} %V32I8 = sub
+  %V32I8 = sub <32 x i8> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V64I8 = sub
+  ; SSE42: cost of 4 {{.*}} %V64I8 = sub
+  ; AVX: cost of 8 {{.*}} %V64I8 = sub
+  ; AVX2: cost of 2 {{.*}} %V64I8 = sub
+  ; AVX512F: cost of 2 {{.*}} %V64I8 = sub
+  ; AVX512BW: cost of 1 {{.*}} %V64I8 = sub
+  %V64I8 = sub <64 x i8> undef, undef
 
   ret i32 undef
 }
 
 ; CHECK-LABEL: 'or'
 define i32 @or(i32 %arg) {
-  ; SSSE3: cost of 1 {{.*}} %A = or
-  ; SSE42: cost of 1 {{.*}} %A = or
-  ; AVX: cost of 1 {{.*}} %A = or
-  ; AVX2: cost of 1 {{.*}} %A = or
-  ; AVX512: cost of 1 {{.*}} %A = or
-  %A = or <2 x i64> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %B = or
-  ; SSE42: cost of 2 {{.*}} %B = or
-  ; AVX: cost of 1 {{.*}} %B = or
-  ; AVX2: cost of 1 {{.*}} %B = or
-  ; AVX512: cost of 1 {{.*}} %B = or
-  %B = or <4 x i64> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %C = or
-  ; SSE42: cost of 4 {{.*}} %C = or
-  ; AVX: cost of 2 {{.*}} %C = or
-  ; AVX2: cost of 2 {{.*}} %C = or
-  ; AVX512: cost of 1 {{.*}} %C = or
-  %C = or <8 x i64> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %D = or
-  ; SSE42: cost of 1 {{.*}} %D = or
-  ; AVX: cost of 1 {{.*}} %D = or
-  ; AVX2: cost of 1 {{.*}} %D = or
-  ; AVX512: cost of 1 {{.*}} %D = or
-  %D = or <4 x i32> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %E = or
-  ; SSE42: cost of 2 {{.*}} %E = or
-  ; AVX: cost of 1 {{.*}} %E = or
-  ; AVX2: cost of 1 {{.*}} %E = or
-  ; AVX512: cost of 1 {{.*}} %E = or
-  %E = or <8 x i32> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %F = or
-  ; SSE42: cost of 4 {{.*}} %F = or
-  ; AVX: cost of 2 {{.*}} %F = or
-  ; AVX2: cost of 2 {{.*}} %F = or
-  ; AVX512: cost of 1 {{.*}} %F = or
-  %F = or <16 x i32> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %G = or
-  ; SSE42: cost of 1 {{.*}} %G = or
-  ; AVX: cost of 1 {{.*}} %G = or
-  ; AVX2: cost of 1 {{.*}} %G = or
-  ; AVX512: cost of 1 {{.*}} %G = or
-  %G = or <8 x i16> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %H = or
-  ; SSE42: cost of 2 {{.*}} %H = or
-  ; AVX: cost of 1 {{.*}} %H = or
-  ; AVX2: cost of 1 {{.*}} %H = or
-  ; AVX512: cost of 1 {{.*}} %H = or
-  %H = or <16 x i16> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %I = or
-  ; SSE42: cost of 4 {{.*}} %I = or
-  ; AVX: cost of 2 {{.*}} %I = or
-  ; AVX2: cost of 2 {{.*}} %I = or
-  ; AVX512F: cost of 2 {{.*}} %I = or
-  ; AVX512BW: cost of 1 {{.*}} %I = or
-  %I = or <32 x i16> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %J = or
-  ; SSE42: cost of 1 {{.*}} %J = or
-  ; AVX: cost of 1 {{.*}} %J = or
-  ; AVX2: cost of 1 {{.*}} %J = or
-  ; AVX512: cost of 1 {{.*}} %J = or
-  %J = or <16 x i8> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %K = or
-  ; SSE42: cost of 2 {{.*}} %K = or
-  ; AVX: cost of 1 {{.*}} %K = or
-  ; AVX2: cost of 1 {{.*}} %K = or
-  ; AVX512: cost of 1 {{.*}} %K = or
-  %K = or <32 x i8> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %L = or
-  ; SSE42: cost of 4 {{.*}} %L = or
-  ; AVX: cost of 2 {{.*}} %L = or
-  ; AVX2: cost of 2 {{.*}} %L = or
-  ; AVX512F: cost of 2 {{.*}} %L = or
-  ; AVX512BW: cost of 1 {{.*}} %L = or
-  %L = or <64 x i8> undef, undef
+  ; CHECK: cost of 1 {{.*}} %I64 = or
+  %I64 = or i64 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V2I64 = or
+  ; SSE42: cost of 1 {{.*}} %V2I64 = or
+  ; AVX: cost of 1 {{.*}} %V2I64 = or
+  ; AVX2: cost of 1 {{.*}} %V2I64 = or
+  ; AVX512: cost of 1 {{.*}} %V2I64 = or
+  %V2I64 = or <2 x i64> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V4I64 = or
+  ; SSE42: cost of 2 {{.*}} %V4I64 = or
+  ; AVX: cost of 1 {{.*}} %V4I64 = or
+  ; AVX2: cost of 1 {{.*}} %V4I64 = or
+  ; AVX512: cost of 1 {{.*}} %V4I64 = or
+  %V4I64 = or <4 x i64> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V8I64 = or
+  ; SSE42: cost of 4 {{.*}} %V8I64 = or
+  ; AVX: cost of 2 {{.*}} %V8I64 = or
+  ; AVX2: cost of 2 {{.*}} %V8I64 = or
+  ; AVX512: cost of 1 {{.*}} %V8I64 = or
+  %V8I64 = or <8 x i64> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I32 = or
+  %I32 = or i32 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V4I32 = or
+  ; SSE42: cost of 1 {{.*}} %V4I32 = or
+  ; AVX: cost of 1 {{.*}} %V4I32 = or
+  ; AVX2: cost of 1 {{.*}} %V4I32 = or
+  ; AVX512: cost of 1 {{.*}} %V4I32 = or
+  %V4I32 = or <4 x i32> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V8I32 = or
+  ; SSE42: cost of 2 {{.*}} %V8I32 = or
+  ; AVX: cost of 1 {{.*}} %V8I32 = or
+  ; AVX2: cost of 1 {{.*}} %V8I32 = or
+  ; AVX512: cost of 1 {{.*}} %V8I32 = or
+  %V8I32 = or <8 x i32> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V16I32 = or
+  ; SSE42: cost of 4 {{.*}} %V16I32 = or
+  ; AVX: cost of 2 {{.*}} %V16I32 = or
+  ; AVX2: cost of 2 {{.*}} %V16I32 = or
+  ; AVX512: cost of 1 {{.*}} %V16I32 = or
+  %V16I32 = or <16 x i32> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I16 = or
+  %I16 = or i16 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V8I16 = or
+  ; SSE42: cost of 1 {{.*}} %V8I16 = or
+  ; AVX: cost of 1 {{.*}} %V8I16 = or
+  ; AVX2: cost of 1 {{.*}} %V8I16 = or
+  ; AVX512: cost of 1 {{.*}} %V8I16 = or
+  %V8I16 = or <8 x i16> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V16I16 = or
+  ; SSE42: cost of 2 {{.*}} %V16I16 = or
+  ; AVX: cost of 1 {{.*}} %V16I16 = or
+  ; AVX2: cost of 1 {{.*}} %V16I16 = or
+  ; AVX512: cost of 1 {{.*}} %V16I16 = or
+  %V16I16 = or <16 x i16> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V32I16 = or
+  ; SSE42: cost of 4 {{.*}} %V32I16 = or
+  ; AVX: cost of 2 {{.*}} %V32I16 = or
+  ; AVX2: cost of 2 {{.*}} %V32I16 = or
+  ; AVX512F: cost of 2 {{.*}} %V32I16 = or
+  ; AVX512BW: cost of 1 {{.*}} %V32I16 = or
+  %V32I16 = or <32 x i16> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I8 = or
+  %I8 = or i8 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V16I8 = or
+  ; SSE42: cost of 1 {{.*}} %V16I8 = or
+  ; AVX: cost of 1 {{.*}} %V16I8 = or
+  ; AVX2: cost of 1 {{.*}} %V16I8 = or
+  ; AVX512: cost of 1 {{.*}} %V16I8 = or
+  %V16I8 = or <16 x i8> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V32I8 = or
+  ; SSE42: cost of 2 {{.*}} %V32I8 = or
+  ; AVX: cost of 1 {{.*}} %V32I8 = or
+  ; AVX2: cost of 1 {{.*}} %V32I8 = or
+  ; AVX512: cost of 1 {{.*}} %V32I8 = or
+  %V32I8 = or <32 x i8> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V64I8 = or
+  ; SSE42: cost of 4 {{.*}} %V64I8 = or
+  ; AVX: cost of 2 {{.*}} %V64I8 = or
+  ; AVX2: cost of 2 {{.*}} %V64I8 = or
+  ; AVX512F: cost of 2 {{.*}} %V64I8 = or
+  ; AVX512BW: cost of 1 {{.*}} %V64I8 = or
+  %V64I8 = or <64 x i8> undef, undef
 
   ret i32 undef
 }
 
 ; CHECK-LABEL: 'xor'
 define i32 @xor(i32 %arg) {
-  ; SSSE3: cost of 1 {{.*}} %A = xor
-  ; SSE42: cost of 1 {{.*}} %A = xor
-  ; AVX: cost of 1 {{.*}} %A = xor
-  ; AVX2: cost of 1 {{.*}} %A = xor
-  ; AVX512: cost of 1 {{.*}} %A = xor
-  %A = xor <2 x i64> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %B = xor
-  ; SSE42: cost of 2 {{.*}} %B = xor
-  ; AVX: cost of 1 {{.*}} %B = xor
-  ; AVX2: cost of 1 {{.*}} %B = xor
-  ; AVX512: cost of 1 {{.*}} %B = xor
-  %B = xor <4 x i64> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %C = xor
-  ; SSE42: cost of 4 {{.*}} %C = xor
-  ; AVX: cost of 2 {{.*}} %C = xor
-  ; AVX2: cost of 2 {{.*}} %C = xor
-  ; AVX512: cost of 1 {{.*}} %C = xor
-  %C = xor <8 x i64> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %D = xor
-  ; SSE42: cost of 1 {{.*}} %D = xor
-  ; AVX: cost of 1 {{.*}} %D = xor
-  ; AVX2: cost of 1 {{.*}} %D = xor
-  ; AVX512: cost of 1 {{.*}} %D = xor
-  %D = xor <4 x i32> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %E = xor
-  ; SSE42: cost of 2 {{.*}} %E = xor
-  ; AVX: cost of 1 {{.*}} %E = xor
-  ; AVX2: cost of 1 {{.*}} %E = xor
-  ; AVX512: cost of 1 {{.*}} %E = xor
-  %E = xor <8 x i32> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %F = xor
-  ; SSE42: cost of 4 {{.*}} %F = xor
-  ; AVX: cost of 2 {{.*}} %F = xor
-  ; AVX2: cost of 2 {{.*}} %F = xor
-  ; AVX512: cost of 1 {{.*}} %F = xor
-  %F = xor <16 x i32> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %G = xor
-  ; SSE42: cost of 1 {{.*}} %G = xor
-  ; AVX: cost of 1 {{.*}} %G = xor
-  ; AVX2: cost of 1 {{.*}} %G = xor
-  ; AVX512: cost of 1 {{.*}} %G = xor
-  %G = xor <8 x i16> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %H = xor
-  ; SSE42: cost of 2 {{.*}} %H = xor
-  ; AVX: cost of 1 {{.*}} %H = xor
-  ; AVX2: cost of 1 {{.*}} %H = xor
-  ; AVX512: cost of 1 {{.*}} %H = xor
-  %H = xor <16 x i16> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %I = xor
-  ; SSE42: cost of 4 {{.*}} %I = xor
-  ; AVX: cost of 2 {{.*}} %I = xor
-  ; AVX2: cost of 2 {{.*}} %I = xor
-  ; AVX512F: cost of 2 {{.*}} %I = xor
-  ; AVX512BW: cost of 1 {{.*}} %I = xor
-  %I = xor <32 x i16> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %J = xor
-  ; SSE42: cost of 1 {{.*}} %J = xor
-  ; AVX: cost of 1 {{.*}} %J = xor
-  ; AVX2: cost of 1 {{.*}} %J = xor
-  ; AVX512: cost of 1 {{.*}} %J = xor
-  %J = xor <16 x i8> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %K = xor
-  ; SSE42: cost of 2 {{.*}} %K = xor
-  ; AVX: cost of 1 {{.*}} %K = xor
-  ; AVX2: cost of 1 {{.*}} %K = xor
-  ; AVX512: cost of 1 {{.*}} %K = xor
-  %K = xor <32 x i8> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %L = xor
-  ; SSE42: cost of 4 {{.*}} %L = xor
-  ; AVX: cost of 2 {{.*}} %L = xor
-  ; AVX2: cost of 2 {{.*}} %L = xor
-  ; AVX512F: cost of 2 {{.*}} %L = xor
-  ; AVX512BW: cost of 1 {{.*}} %L = xor
-  %L = xor <64 x i8> undef, undef
+  ; CHECK: cost of 1 {{.*}} %I64 = xor
+  %I64 = xor i64 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V2I64 = xor
+  ; SSE42: cost of 1 {{.*}} %V2I64 = xor
+  ; AVX: cost of 1 {{.*}} %V2I64 = xor
+  ; AVX2: cost of 1 {{.*}} %V2I64 = xor
+  ; AVX512: cost of 1 {{.*}} %V2I64 = xor
+  %V2I64 = xor <2 x i64> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V4I64 = xor
+  ; SSE42: cost of 2 {{.*}} %V4I64 = xor
+  ; AVX: cost of 1 {{.*}} %V4I64 = xor
+  ; AVX2: cost of 1 {{.*}} %V4I64 = xor
+  ; AVX512: cost of 1 {{.*}} %V4I64 = xor
+  %V4I64 = xor <4 x i64> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V8I64 = xor
+  ; SSE42: cost of 4 {{.*}} %V8I64 = xor
+  ; AVX: cost of 2 {{.*}} %V8I64 = xor
+  ; AVX2: cost of 2 {{.*}} %V8I64 = xor
+  ; AVX512: cost of 1 {{.*}} %V8I64 = xor
+  %V8I64 = xor <8 x i64> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I32 = xor
+  %I32 = xor i32 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V4I32 = xor
+  ; SSE42: cost of 1 {{.*}} %V4I32 = xor
+  ; AVX: cost of 1 {{.*}} %V4I32 = xor
+  ; AVX2: cost of 1 {{.*}} %V4I32 = xor
+  ; AVX512: cost of 1 {{.*}} %V4I32 = xor
+  %V4I32 = xor <4 x i32> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V8I32 = xor
+  ; SSE42: cost of 2 {{.*}} %V8I32 = xor
+  ; AVX: cost of 1 {{.*}} %V8I32 = xor
+  ; AVX2: cost of 1 {{.*}} %V8I32 = xor
+  ; AVX512: cost of 1 {{.*}} %V8I32 = xor
+  %V8I32 = xor <8 x i32> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V16I32 = xor
+  ; SSE42: cost of 4 {{.*}} %V16I32 = xor
+  ; AVX: cost of 2 {{.*}} %V16I32 = xor
+  ; AVX2: cost of 2 {{.*}} %V16I32 = xor
+  ; AVX512: cost of 1 {{.*}} %V16I32 = xor
+  %V16I32 = xor <16 x i32> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I16 = xor
+  %I16 = xor i16 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V8I16 = xor
+  ; SSE42: cost of 1 {{.*}} %V8I16 = xor
+  ; AVX: cost of 1 {{.*}} %V8I16 = xor
+  ; AVX2: cost of 1 {{.*}} %V8I16 = xor
+  ; AVX512: cost of 1 {{.*}} %V8I16 = xor
+  %V8I16 = xor <8 x i16> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V16I16 = xor
+  ; SSE42: cost of 2 {{.*}} %V16I16 = xor
+  ; AVX: cost of 1 {{.*}} %V16I16 = xor
+  ; AVX2: cost of 1 {{.*}} %V16I16 = xor
+  ; AVX512: cost of 1 {{.*}} %V16I16 = xor
+  %V16I16 = xor <16 x i16> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V32I16 = xor
+  ; SSE42: cost of 4 {{.*}} %V32I16 = xor
+  ; AVX: cost of 2 {{.*}} %V32I16 = xor
+  ; AVX2: cost of 2 {{.*}} %V32I16 = xor
+  ; AVX512F: cost of 2 {{.*}} %V32I16 = xor
+  ; AVX512BW: cost of 1 {{.*}} %V32I16 = xor
+  %V32I16 = xor <32 x i16> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I8 = xor
+  %I8 = xor i8 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V16I8 = xor
+  ; SSE42: cost of 1 {{.*}} %V16I8 = xor
+  ; AVX: cost of 1 {{.*}} %V16I8 = xor
+  ; AVX2: cost of 1 {{.*}} %V16I8 = xor
+  ; AVX512: cost of 1 {{.*}} %V16I8 = xor
+  %V16I8 = xor <16 x i8> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V32I8 = xor
+  ; SSE42: cost of 2 {{.*}} %V32I8 = xor
+  ; AVX: cost of 1 {{.*}} %V32I8 = xor
+  ; AVX2: cost of 1 {{.*}} %V32I8 = xor
+  ; AVX512: cost of 1 {{.*}} %V32I8 = xor
+  %V32I8 = xor <32 x i8> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V64I8 = xor
+  ; SSE42: cost of 4 {{.*}} %V64I8 = xor
+  ; AVX: cost of 2 {{.*}} %V64I8 = xor
+  ; AVX2: cost of 2 {{.*}} %V64I8 = xor
+  ; AVX512F: cost of 2 {{.*}} %V64I8 = xor
+  ; AVX512BW: cost of 1 {{.*}} %V64I8 = xor
+  %V64I8 = xor <64 x i8> undef, undef
 
   ret i32 undef
 }
 
 ; CHECK-LABEL: 'and'
 define i32 @and(i32 %arg) {
-  ; SSSE3: cost of 1 {{.*}} %A = and
-  ; SSE42: cost of 1 {{.*}} %A = and
-  ; AVX: cost of 1 {{.*}} %A = and
-  ; AVX2: cost of 1 {{.*}} %A = and
-  ; AVX512: cost of 1 {{.*}} %A = and
-  %A = and <2 x i64> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %B = and
-  ; SSE42: cost of 2 {{.*}} %B = and
-  ; AVX: cost of 1 {{.*}} %B = and
-  ; AVX2: cost of 1 {{.*}} %B = and
-  ; AVX512: cost of 1 {{.*}} %B = and
-  %B = and <4 x i64> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %C = and
-  ; SSE42: cost of 4 {{.*}} %C = and
-  ; AVX: cost of 2 {{.*}} %C = and
-  ; AVX2: cost of 2 {{.*}} %C = and
-  ; AVX512: cost of 1 {{.*}} %C = and
-  %C = and <8 x i64> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %D = and
-  ; SSE42: cost of 1 {{.*}} %D = and
-  ; AVX: cost of 1 {{.*}} %D = and
-  ; AVX2: cost of 1 {{.*}} %D = and
-  ; AVX512: cost of 1 {{.*}} %D = and
-  %D = and <4 x i32> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %E = and
-  ; SSE42: cost of 2 {{.*}} %E = and
-  ; AVX: cost of 1 {{.*}} %E = and
-  ; AVX2: cost of 1 {{.*}} %E = and
-  ; AVX512: cost of 1 {{.*}} %E = and
-  %E = and <8 x i32> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %F = and
-  ; SSE42: cost of 4 {{.*}} %F = and
-  ; AVX: cost of 2 {{.*}} %F = and
-  ; AVX2: cost of 2 {{.*}} %F = and
-  ; AVX512: cost of 1 {{.*}} %F = and
-  %F = and <16 x i32> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %G = and
-  ; SSE42: cost of 1 {{.*}} %G = and
-  ; AVX: cost of 1 {{.*}} %G = and
-  ; AVX2: cost of 1 {{.*}} %G = and
-  ; AVX512: cost of 1 {{.*}} %G = and
-  %G = and <8 x i16> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %H = and
-  ; SSE42: cost of 2 {{.*}} %H = and
-  ; AVX: cost of 1 {{.*}} %H = and
-  ; AVX2: cost of 1 {{.*}} %H = and
-  ; AVX512: cost of 1 {{.*}} %H = and
-  %H = and <16 x i16> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %I = and
-  ; SSE42: cost of 4 {{.*}} %I = and
-  ; AVX: cost of 2 {{.*}} %I = and
-  ; AVX2: cost of 2 {{.*}} %I = and
-  ; AVX512F: cost of 2 {{.*}} %I = and
-  ; AVX512BW: cost of 1 {{.*}} %I = and
-  %I = and <32 x i16> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %J = and
-  ; SSE42: cost of 1 {{.*}} %J = and
-  ; AVX: cost of 1 {{.*}} %J = and
-  ; AVX2: cost of 1 {{.*}} %J = and
-  ; AVX512: cost of 1 {{.*}} %J = and
-  %J = and <16 x i8> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %K = and
-  ; SSE42: cost of 2 {{.*}} %K = and
-  ; AVX: cost of 1 {{.*}} %K = and
-  ; AVX2: cost of 1 {{.*}} %K = and
-  ; AVX512: cost of 1 {{.*}} %K = and
-  %K = and <32 x i8> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %L = and
-  ; SSE42: cost of 4 {{.*}} %L = and
-  ; AVX: cost of 2 {{.*}} %L = and
-  ; AVX2: cost of 2 {{.*}} %L = and
-  ; AVX512F: cost of 2 {{.*}} %L = and
-  ; AVX512BW: cost of 1 {{.*}} %L = and
-  %L = and <64 x i8> undef, undef
+  ; CHECK: cost of 1 {{.*}} %I64 = and
+  %I64 = and i64 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V2I64 = and
+  ; SSE42: cost of 1 {{.*}} %V2I64 = and
+  ; AVX: cost of 1 {{.*}} %V2I64 = and
+  ; AVX2: cost of 1 {{.*}} %V2I64 = and
+  ; AVX512: cost of 1 {{.*}} %V2I64 = and
+  %V2I64 = and <2 x i64> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V4I64 = and
+  ; SSE42: cost of 2 {{.*}} %V4I64 = and
+  ; AVX: cost of 1 {{.*}} %V4I64 = and
+  ; AVX2: cost of 1 {{.*}} %V4I64 = and
+  ; AVX512: cost of 1 {{.*}} %V4I64 = and
+  %V4I64 = and <4 x i64> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V8I64 = and
+  ; SSE42: cost of 4 {{.*}} %V8I64 = and
+  ; AVX: cost of 2 {{.*}} %V8I64 = and
+  ; AVX2: cost of 2 {{.*}} %V8I64 = and
+  ; AVX512: cost of 1 {{.*}} %V8I64 = and
+  %V8I64 = and <8 x i64> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I32 = and
+  %I32 = and i32 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V4I32 = and
+  ; SSE42: cost of 1 {{.*}} %V4I32 = and
+  ; AVX: cost of 1 {{.*}} %V4I32 = and
+  ; AVX2: cost of 1 {{.*}} %V4I32 = and
+  ; AVX512: cost of 1 {{.*}} %V4I32 = and
+  %V4I32 = and <4 x i32> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V8I32 = and
+  ; SSE42: cost of 2 {{.*}} %V8I32 = and
+  ; AVX: cost of 1 {{.*}} %V8I32 = and
+  ; AVX2: cost of 1 {{.*}} %V8I32 = and
+  ; AVX512: cost of 1 {{.*}} %V8I32 = and
+  %V8I32 = and <8 x i32> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V16I32 = and
+  ; SSE42: cost of 4 {{.*}} %V16I32 = and
+  ; AVX: cost of 2 {{.*}} %V16I32 = and
+  ; AVX2: cost of 2 {{.*}} %V16I32 = and
+  ; AVX512: cost of 1 {{.*}} %V16I32 = and
+  %V16I32 = and <16 x i32> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I16 = and
+  %I16 = and i16 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V8I16 = and
+  ; SSE42: cost of 1 {{.*}} %V8I16 = and
+  ; AVX: cost of 1 {{.*}} %V8I16 = and
+  ; AVX2: cost of 1 {{.*}} %V8I16 = and
+  ; AVX512: cost of 1 {{.*}} %V8I16 = and
+  %V8I16 = and <8 x i16> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V16I16 = and
+  ; SSE42: cost of 2 {{.*}} %V16I16 = and
+  ; AVX: cost of 1 {{.*}} %V16I16 = and
+  ; AVX2: cost of 1 {{.*}} %V16I16 = and
+  ; AVX512: cost of 1 {{.*}} %V16I16 = and
+  %V16I16 = and <16 x i16> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V32I16 = and
+  ; SSE42: cost of 4 {{.*}} %V32I16 = and
+  ; AVX: cost of 2 {{.*}} %V32I16 = and
+  ; AVX2: cost of 2 {{.*}} %V32I16 = and
+  ; AVX512F: cost of 2 {{.*}} %V32I16 = and
+  ; AVX512BW: cost of 1 {{.*}} %V32I16 = and
+  %V32I16 = and <32 x i16> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I8 = and
+  %I8 = and i8 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V16I8 = and
+  ; SSE42: cost of 1 {{.*}} %V16I8 = and
+  ; AVX: cost of 1 {{.*}} %V16I8 = and
+  ; AVX2: cost of 1 {{.*}} %V16I8 = and
+  ; AVX512: cost of 1 {{.*}} %V16I8 = and
+  %V16I8 = and <16 x i8> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V32I8 = and
+  ; SSE42: cost of 2 {{.*}} %V32I8 = and
+  ; AVX: cost of 1 {{.*}} %V32I8 = and
+  ; AVX2: cost of 1 {{.*}} %V32I8 = and
+  ; AVX512: cost of 1 {{.*}} %V32I8 = and
+  %V32I8 = and <32 x i8> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V64I8 = and
+  ; SSE42: cost of 4 {{.*}} %V64I8 = and
+  ; AVX: cost of 2 {{.*}} %V64I8 = and
+  ; AVX2: cost of 2 {{.*}} %V64I8 = and
+  ; AVX512F: cost of 2 {{.*}} %V64I8 = and
+  ; AVX512BW: cost of 1 {{.*}} %V64I8 = and
+  %V64I8 = and <64 x i8> undef, undef
 
   ret i32 undef
 }
 
 ; CHECK-LABEL: 'mul'
 define i32 @mul(i32 %arg) {
-  ; SSSE3: cost of 8 {{.*}} %A = mul
-  ; SSE42: cost of 8 {{.*}} %A = mul
-  ; AVX: cost of 8 {{.*}} %A = mul
-  ; AVX2: cost of 8 {{.*}} %A = mul
-  ; AVX512F: cost of 8 {{.*}} %A = mul
-  ; AVX512BW: cost of 8 {{.*}} %A = mul
-  ; AVX512DQ: cost of 1 {{.*}} %A = mul
-  %A = mul <2 x i64> undef, undef
-  ; SSSE3: cost of 16 {{.*}} %B = mul
-  ; SSE42: cost of 16 {{.*}} %B = mul
-  ; AVX: cost of 18 {{.*}} %B = mul
-  ; AVX2: cost of 8 {{.*}} %B = mul
-  ; AVX512F: cost of 8 {{.*}} %B = mul
-  ; AVX512BW: cost of 8 {{.*}} %B = mul
-  ; AVX512DQ: cost of 1 {{.*}} %B = mul
-  %B = mul <4 x i64> undef, undef
-  ; SSSE3: cost of 32 {{.*}} %C = mul
-  ; SSE42: cost of 32 {{.*}} %C = mul
-  ; AVX: cost of 36 {{.*}} %C = mul
-  ; AVX2: cost of 16 {{.*}} %C = mul
-  ; AVX512F: cost of 8 {{.*}} %C = mul
-  ; AVX512BW: cost of 8 {{.*}} %C = mul
-  ; AVX512DQ: cost of 1 {{.*}} %C = mul
-  %C = mul <8 x i64> undef, undef
-
-  ; SSSE3: cost of 6 {{.*}} %D = mul
-  ; SSE42: cost of 1 {{.*}} %D = mul
-  ; AVX: cost of 1 {{.*}} %D = mul
-  ; AVX2: cost of 1 {{.*}} %D = mul
-  ; AVX512: cost of 1 {{.*}} %D = mul
-  %D = mul <4 x i32> undef, undef
-  ; SSSE3: cost of 12 {{.*}} %E = mul
-  ; SSE42: cost of 2 {{.*}} %E = mul
-  ; AVX: cost of 4 {{.*}} %E = mul
-  ; AVX2: cost of 1 {{.*}} %E = mul
-  ; AVX512: cost of 1 {{.*}} %E = mul
-  %E = mul <8 x i32> undef, undef
-  ; SSSE3: cost of 24 {{.*}} %F = mul
-  ; SSE42: cost of 4 {{.*}} %F = mul
-  ; AVX: cost of 8 {{.*}} %F = mul
-  ; AVX2: cost of 2 {{.*}} %F = mul
-  ; AVX512: cost of 1 {{.*}} %F = mul
-  %F = mul <16 x i32> undef, undef
-
-  ; SSSE3: cost of 1 {{.*}} %G = mul
-  ; SSE42: cost of 1 {{.*}} %G = mul
-  ; AVX: cost of 1 {{.*}} %G = mul
-  ; AVX2: cost of 1 {{.*}} %G = mul
-  ; AVX512: cost of 1 {{.*}} %G = mul
-  %G = mul <8 x i16> undef, undef
-  ; SSSE3: cost of 2 {{.*}} %H = mul
-  ; SSE42: cost of 2 {{.*}} %H = mul
-  ; AVX: cost of 4 {{.*}} %H = mul
-  ; AVX2: cost of 1 {{.*}} %H = mul
-  ; AVX512: cost of 1 {{.*}} %H = mul
-  %H = mul <16 x i16> undef, undef
-  ; SSSE3: cost of 4 {{.*}} %I = mul
-  ; SSE42: cost of 4 {{.*}} %I = mul
-  ; AVX: cost of 8 {{.*}} %I = mul
-  ; AVX2: cost of 2 {{.*}} %I = mul
-  ; AVX512F: cost of 2 {{.*}} %I = mul
-  ; AVX512BW: cost of 1 {{.*}} %I = mul
-  %I = mul <32 x i16> undef, undef
-
-  ; SSSE3: cost of 12 {{.*}} %J = mul
-  ; SSE42: cost of 12 {{.*}} %J = mul
-  ; AVX: cost of 12 {{.*}} %J = mul
-  ; AVX2: cost of 7 {{.*}} %J = mul
-  ; AVX512F: cost of 5 {{.*}} %J = mul
-  ; AVX512BW: cost of 4 {{.*}} %J = mul
-  %J = mul <16 x i8> undef, undef
-  ; SSSE3: cost of 24 {{.*}} %K = mul
-  ; SSE42: cost of 24 {{.*}} %K = mul
-  ; AVX: cost of 26 {{.*}} %K = mul
-  ; AVX2: cost of 17 {{.*}} %K = mul
-  ; AVX512F: cost of 13 {{.*}} %K = mul
-  ; AVX512BW: cost of 4 {{.*}} %K = mul
-  %K = mul <32 x i8> undef, undef
-  ; SSSE3: cost of 48 {{.*}} %L = mul
-  ; SSE42: cost of 48 {{.*}} %L = mul
-  ; AVX: cost of 52 {{.*}} %L = mul
-  ; AVX2: cost of 34 {{.*}} %L = mul
-  ; AVX512F: cost of 26 {{.*}} %L = mul
-  ; AVX512BW: cost of 11 {{.*}} %L = mul
-  %L = mul <64 x i8> undef, undef
+  ; CHECK: cost of 1 {{.*}} %I64 = mul
+  %I64 = mul i64 undef, undef
+  ; SSSE3: cost of 8 {{.*}} %V2I64 = mul
+  ; SSE42: cost of 8 {{.*}} %V2I64 = mul
+  ; AVX: cost of 8 {{.*}} %V2I64 = mul
+  ; AVX2: cost of 8 {{.*}} %V2I64 = mul
+  ; AVX512F: cost of 8 {{.*}} %V2I64 = mul
+  ; AVX512BW: cost of 8 {{.*}} %V2I64 = mul
+  ; AVX512DQ: cost of 1 {{.*}} %V2I64 = mul
+  %V2I64 = mul <2 x i64> undef, undef
+  ; SSSE3: cost of 16 {{.*}} %V4I64 = mul
+  ; SSE42: cost of 16 {{.*}} %V4I64 = mul
+  ; AVX: cost of 18 {{.*}} %V4I64 = mul
+  ; AVX2: cost of 8 {{.*}} %V4I64 = mul
+  ; AVX512F: cost of 8 {{.*}} %V4I64 = mul
+  ; AVX512BW: cost of 8 {{.*}} %V4I64 = mul
+  ; AVX512DQ: cost of 1 {{.*}} %V4I64 = mul
+  %V4I64 = mul <4 x i64> undef, undef
+  ; SSSE3: cost of 32 {{.*}} %V8I64 = mul
+  ; SSE42: cost of 32 {{.*}} %V8I64 = mul
+  ; AVX: cost of 36 {{.*}} %V8I64 = mul
+  ; AVX2: cost of 16 {{.*}} %V8I64 = mul
+  ; AVX512F: cost of 8 {{.*}} %V8I64 = mul
+  ; AVX512BW: cost of 8 {{.*}} %V8I64 = mul
+  ; AVX512DQ: cost of 1 {{.*}} %V8I64 = mul
+  %V8I64 = mul <8 x i64> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I32 = mul
+  %I32 = mul i32 undef, undef
+  ; SSSE3: cost of 6 {{.*}} %V4I32 = mul
+  ; SSE42: cost of 1 {{.*}} %V4I32 = mul
+  ; AVX: cost of 1 {{.*}} %V4I32 = mul
+  ; AVX2: cost of 1 {{.*}} %V4I32 = mul
+  ; AVX512: cost of 1 {{.*}} %V4I32 = mul
+  %V4I32 = mul <4 x i32> undef, undef
+  ; SSSE3: cost of 12 {{.*}} %V8I32 = mul
+  ; SSE42: cost of 2 {{.*}} %V8I32 = mul
+  ; AVX: cost of 4 {{.*}} %V8I32 = mul
+  ; AVX2: cost of 1 {{.*}} %V8I32 = mul
+  ; AVX512: cost of 1 {{.*}} %V8I32 = mul
+  %V8I32 = mul <8 x i32> undef, undef
+  ; SSSE3: cost of 24 {{.*}} %V16I32 = mul
+  ; SSE42: cost of 4 {{.*}} %V16I32 = mul
+  ; AVX: cost of 8 {{.*}} %V16I32 = mul
+  ; AVX2: cost of 2 {{.*}} %V16I32 = mul
+  ; AVX512: cost of 1 {{.*}} %V16I32 = mul
+  %V16I32 = mul <16 x i32> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I16 = mul
+  %I16 = mul i16 undef, undef
+  ; SSSE3: cost of 1 {{.*}} %V8I16 = mul
+  ; SSE42: cost of 1 {{.*}} %V8I16 = mul
+  ; AVX: cost of 1 {{.*}} %V8I16 = mul
+  ; AVX2: cost of 1 {{.*}} %V8I16 = mul
+  ; AVX512: cost of 1 {{.*}} %V8I16 = mul
+  %V8I16 = mul <8 x i16> undef, undef
+  ; SSSE3: cost of 2 {{.*}} %V16I16 = mul
+  ; SSE42: cost of 2 {{.*}} %V16I16 = mul
+  ; AVX: cost of 4 {{.*}} %V16I16 = mul
+  ; AVX2: cost of 1 {{.*}} %V16I16 = mul
+  ; AVX512: cost of 1 {{.*}} %V16I16 = mul
+  %V16I16 = mul <16 x i16> undef, undef
+  ; SSSE3: cost of 4 {{.*}} %V32I16 = mul
+  ; SSE42: cost of 4 {{.*}} %V32I16 = mul
+  ; AVX: cost of 8 {{.*}} %V32I16 = mul
+  ; AVX2: cost of 2 {{.*}} %V32I16 = mul
+  ; AVX512F: cost of 2 {{.*}} %V32I16 = mul
+  ; AVX512BW: cost of 1 {{.*}} %V32I16 = mul
+  %V32I16 = mul <32 x i16> undef, undef
+
+  ; CHECK: cost of 1 {{.*}} %I8 = mul
+  %I8 = mul i8 undef, undef
+  ; SSSE3: cost of 12 {{.*}} %V16I8 = mul
+  ; SSE42: cost of 12 {{.*}} %V16I8 = mul
+  ; AVX: cost of 12 {{.*}} %V16I8 = mul
+  ; AVX2: cost of 7 {{.*}} %V16I8 = mul
+  ; AVX512F: cost of 5 {{.*}} %V16I8 = mul
+  ; AVX512BW: cost of 4 {{.*}} %V16I8 = mul
+  %V16I8 = mul <16 x i8> undef, undef
+  ; SSSE3: cost of 24 {{.*}} %V32I8 = mul
+  ; SSE42: cost of 24 {{.*}} %V32I8 = mul
+  ; AVX: cost of 26 {{.*}} %V32I8 = mul
+  ; AVX2: cost of 17 {{.*}} %V32I8 = mul
+  ; AVX512F: cost of 13 {{.*}} %V32I8 = mul
+  ; AVX512BW: cost of 4 {{.*}} %V32I8 = mul
+  %V32I8 = mul <32 x i8> undef, undef
+  ; SSSE3: cost of 48 {{.*}} %V64I8 = mul
+  ; SSE42: cost of 48 {{.*}} %V64I8 = mul
+  ; AVX: cost of 52 {{.*}} %V64I8 = mul
+  ; AVX2: cost of 34 {{.*}} %V64I8 = mul
+  ; AVX512F: cost of 26 {{.*}} %V64I8 = mul
+  ; AVX512BW: cost of 11 {{.*}} %V64I8 = mul
+  %V64I8 = mul <64 x i8> undef, undef
 
   ret i32 undef
 }
diff --git a/test/Analysis/CostModel/X86/interleaved-load-i8.ll b/test/Analysis/CostModel/X86/interleaved-load-i8.ll
new file mode 100644
index 0000000000000..382e5e5301d69
--- /dev/null
+++ b/test/Analysis/CostModel/X86/interleaved-load-i8.ll
@@ -0,0 +1,98 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=core-avx2 --debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind readonly uwtable
+define i32 @doit_stride3(i8* nocapture readonly %Ptr, i32 %Nels)  {
+;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 11 for VF 2 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 10 for VF 8 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 20 for VF 16 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 45 for VF 32 For instruction:   %0 = load i8
+entry:
+  %cmp13 = icmp sgt i32 %Nels, 0
+  br i1 %cmp13, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %Ptr.addr.016 = phi i8* [ %incdec.ptr2, %for.body ], [ %Ptr, %for.body.preheader ]
+  %i.015 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %s.014 = phi i32 [ %add6, %for.body ], [ 0, %for.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.016, i64 1
+  %0 = load i8, i8* %Ptr.addr.016, align 1
+  %incdec.ptr1 = getelementptr inbounds i8, i8* %Ptr.addr.016, i64 2
+  %1 = load i8, i8* %incdec.ptr, align 1
+  %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.016, i64 3
+  %2 = load i8, i8* %incdec.ptr1, align 1
+  %conv = zext i8 %0 to i32
+  %conv3 = zext i8 %1 to i32
+  %conv4 = zext i8 %2 to i32
+  %add = add i32 %s.014, %conv
+  %add5 = add i32 %add, %conv3
+  %add6 = add i32 %add5, %conv4
+  %inc = add nuw nsw i32 %i.015, 1
+  %exitcond = icmp eq i32 %inc, %Nels
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  %add6.lcssa = phi i32 [ %add6, %for.body ]
+  br label %for.end
+
+for.end:
+  %s.0.lcssa = phi i32 [ 0, %entry ], [ %add6.lcssa, %for.end.loopexit ]
+  ret i32 %s.0.lcssa
+}
+
+; Function Attrs: norecurse nounwind readonly uwtable
+define i32 @doit_stride4(i8* nocapture readonly %Ptr, i32 %Nels) local_unnamed_addr {
+;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 21 for VF 8 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 41 for VF 16 For instruction:   %0 = load i8
+;CHECK: LV: Found an estimated cost of 84 for VF 32 For instruction:   %0 = load i8
+entry:
+  %cmp59 = icmp sgt i32 %Nels, 0
+  br i1 %cmp59, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %Ptr.addr.062 = phi i8* [ %incdec.ptr3, %for.body ], [ %Ptr, %for.body.preheader ]
+  %i.061 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %s.060 = phi i32 [ %cond39, %for.body ], [ 0, %for.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 1
+  %0 = load i8, i8* %Ptr.addr.062, align 1
+  %incdec.ptr1 = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 2
+  %1 = load i8, i8* %incdec.ptr, align 1
+  %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 3
+  %2 = load i8, i8* %incdec.ptr1, align 1
+  %incdec.ptr3 = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 4
+  %3 = load i8, i8* %incdec.ptr2, align 1
+  %cmp5 = icmp ult i8 %0, %1
+  %.sink = select i1 %cmp5, i8 %0, i8 %1
+  %cmp12 = icmp ult i8 %.sink, %2
+  %.sink40 = select i1 %cmp12, i8 %.sink, i8 %2
+  %cmp23 = icmp ult i8 %.sink40, %3
+  %.sink41 = select i1 %cmp23, i8 %.sink40, i8 %3
+  %conv28 = zext i8 %.sink41 to i32
+  %cmp33 = icmp slt i32 %s.060, %conv28
+  %cond39 = select i1 %cmp33, i32 %s.060, i32 %conv28
+  %inc = add nuw nsw i32 %i.061, 1
+  %exitcond = icmp eq i32 %inc, %Nels
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: 
+  %cond39.lcssa = phi i32 [ %cond39, %for.body ]
+  br label %for.end
+
+for.end:
+  %s.0.lcssa = phi i32 [ 0, %entry ], [ %cond39.lcssa, %for.end.loopexit ]
+  ret i32 %s.0.lcssa
+}
diff --git a/test/Analysis/CostModel/X86/interleaved-store-i8.ll b/test/Analysis/CostModel/X86/interleaved-store-i8.ll
new file mode 100644
index 0000000000000..d8408c1527633
--- /dev/null
+++ b/test/Analysis/CostModel/X86/interleaved-store-i8.ll
@@ -0,0 +1,85 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=core-avx2 --debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit_stride3(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr {
+;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i8 %conv4
+;CHECK: LV: Found an estimated cost of 8 for VF 2 For instruction:   store i8 %conv4
+;CHECK: LV: Found an estimated cost of 9 for VF 4 For instruction:   store i8 %conv4
+;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction:   store i8 %conv4
+;CHECK: LV: Found an estimated cost of 19 for VF 16 For instruction:   store i8 %conv4
+;CHECK: LV: Found an estimated cost of 35 for VF 32 For instruction:   store i8 %conv4
+entry:
+  %cmp14 = icmp sgt i32 %Nels, 0
+  br i1 %cmp14, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %conv = trunc i32 %Nels to i8
+  %conv1 = shl i8 %conv, 1
+  %conv4 = shl i8 %conv, 2
+  br label %for.body
+
+for.body:
+  %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %Ptr.addr.015 = phi i8* [ %Ptr, %for.body.lr.ph ], [ %incdec.ptr5, %for.body ]
+  %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.015, i64 1
+  store i8 %conv, i8* %Ptr.addr.015, align 1
+  %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.015, i64 2
+  store i8 %conv1, i8* %incdec.ptr, align 1
+  %incdec.ptr5 = getelementptr inbounds i8, i8* %Ptr.addr.015, i64 3
+  store i8 %conv4, i8* %incdec.ptr2, align 1
+  %inc = add nuw nsw i32 %i.016, 1
+  %exitcond = icmp eq i32 %inc, %Nels
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit_stride4(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr {
+;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i8 %conv7
+;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction:   store i8 %conv7
+;CHECK: LV: Found an estimated cost of 10 for VF 4 For instruction:   store i8 %conv7
+;CHECK: LV: Found an estimated cost of 17 for VF 8 For instruction:   store i8 %conv7
+;CHECK: LV: Found an estimated cost of 22 for VF 16 For instruction:   store i8 %conv7
+;CHECK: LV: Found an estimated cost of 44 for VF 32 For instruction:   store i8 %conv7
+entry:
+  %cmp19 = icmp sgt i32 %Nels, 0
+  br i1 %cmp19, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %conv = trunc i32 %Nels to i8
+  %conv1 = shl i8 %conv, 1
+  %conv4 = shl i8 %conv, 2
+  %mul6 = mul nsw i32 %Nels, 5
+  %conv7 = trunc i32 %mul6 to i8
+  br label %for.body
+
+for.body:
+  %i.021 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %Ptr.addr.020 = phi i8* [ %Ptr, %for.body.lr.ph ], [ %incdec.ptr8, %for.body ]
+  %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 1
+  store i8 %conv, i8* %Ptr.addr.020, align 1
+  %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 2
+  store i8 %conv1, i8* %incdec.ptr, align 1
+  %incdec.ptr5 = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 3
+  store i8 %conv4, i8* %incdec.ptr2, align 1
+  %incdec.ptr8 = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 4
+  store i8 %conv7, i8* %incdec.ptr5, align 1
+  %inc = add nuw nsw i32 %i.021, 1
+  %exitcond = icmp eq i32 %inc, %Nels
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
index 3a0ab0f03b995..e797b377556e9 100644
--- a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
+++ b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
@@ -100,3 +100,90 @@ backedge:
 exit:
   ret i8 0
 }
+
+; Merging cont block into do block. Make sure that we do not incorrectly have the cont
+; LVI info as LVI info for the beginning of do block. LVI info for %i is Range[0,1)
+; at beginning of cont Block, which is incorrect at the beginning of do block.
+define i32 @test3(i32 %i, i1 %f, i32 %n) {
+; CHECK-LABEL: LVI for function 'test3':
+; CHECK-LABEL: entry
+; CHECK:  ; LatticeVal for: 'i32 %i' is: overdefined
+; CHECK: %c = icmp ne i32 %i, -2134 
+; CHECK: br i1 %c, label %cont, label %exit
+entry:
+  %c = icmp ne i32 %i, -2134
+  br i1 %c, label %do, label %exit
+
+exit:
+  %c1 = icmp ne i32 %i, -42
+  br i1 %c1, label %exit2, label %exit
+
+; CHECK-LABEL: cont:
+; Here cont is merged to do and i is any value except -2134.
+; i is not the single value: zero.
+; CHECK-NOT:  ; LatticeVal for: 'i32 %i' is: constantrange<0, 1>
+; CHECK:      ; LatticeVal for: 'i32 %i' is: constantrange<-2133, -2134>
+; CHECK:      ; LatticeVal for: '  %cond.0 = icmp sgt i32 %i, 0' in BB: '%cont' is: overdefined
+; CHECK:   %cond.0 = icmp sgt i32 %i, 0
+; CHECK:   %consume = call i32 @consume
+; CHECK:   %cond = icmp eq i32 %i, 0
+; CHECK:   call void (i1, ...) @llvm.experimental.guard(i1 %cond)
+; CHECK:   %cond.3 = icmp sgt i32 %i, %n
+; CHECK:   br i1 %cond.3, label %exit2, label %exit
+cont:
+  %cond.3 = icmp sgt i32 %i, %n
+  br i1 %cond.3, label %exit2, label %exit
+
+do:
+  %cond.0 = icmp sgt i32 %i, 0
+  %consume = call i32 @consume(i1 %cond.0)
+  %cond = icmp eq i32 %i, 0
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  %cond.2 = icmp sgt i32 %i, 0
+  br i1 %cond.2, label %exit, label %cont
+  
+exit2:
+; CHECK-LABEL: exit2:
+; LatticeVal for: 'i32 %i' is: constantrange<-2134, 1>
+  ret i32 30
+}
+
+; FIXME: We should be able to merge cont into do.
+; When we do so, LVI for cont cannot be the one for the merged do block.
+define i32 @test4(i32 %i, i1 %f, i32 %n) {
+; CHECK-LABEL: LVI for function 'test4':
+entry:
+  %c = icmp ne i32 %i, -2134
+  br i1 %c, label %do, label %exit
+
+exit:                                             ; preds = %do, %cont, %exit, %entry
+  %c1 = icmp ne i32 %i, -42
+  br i1 %c1, label %exit2, label %exit
+
+cont:                                             ; preds = %do
+; CHECK-LABEL: cont:
+; CHECK:  ; LatticeVal for: 'i1 %f' is: constantrange<-1, 0>
+; CHECK: call void @dummy(i1 %f)
+  call void @dummy(i1 %f)
+  br label %exit2
+
+do:                                               ; preds = %entry
+; CHECK-LABEL: do:
+; CHECK:  ; LatticeVal for: 'i1 %f' is: overdefined
+; CHECK: call void @dummy(i1 %f)
+; CHECK: br i1 %cond, label %exit, label %cont
+  call void @dummy(i1 %f)
+  %consume = call i32 @exit()
+  call void @llvm.assume(i1 %f)
+  %cond = icmp eq i1 %f, false
+  br i1 %cond, label %exit, label %cont
+
+exit2:                                            ; preds = %cont, %exit
+  ret i32 30
+}
+
+declare i32 @exit()
+declare i32 @consume(i1)
+declare void @llvm.assume(i1) nounwind
+declare void @dummy(i1) nounwind
+declare void @llvm.experimental.guard(i1, ...)
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
index 455968d7a4017..8ac6301f93184 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
@@ -26,21 +26,21 @@ for.body:                                         ; preds = %entry, %for.body
   %idxprom = sext i32 %sub to i64
   %half = bitcast %union.vector_t* %vb to [8 x i16]*
   %arrayidx = getelementptr inbounds [8 x i16], [8 x i16]* %half, i64 0, i64 %idxprom
-  %tmp4 = load i16, i16* %arrayidx, align 2, !tbaa !0
+  %tmp4 = load i16, i16* %arrayidx, align 2, !tbaa !10
   %conv = zext i16 %tmp4 to i32
   %and = and i32 %conv, 15
   %sub6 = sub nsw i32 7, %i.01
   %idxprom7 = sext i32 %sub6 to i64
   %half9 = bitcast %union.vector_t* %va to [8 x i16]*
   %arrayidx10 = getelementptr inbounds [8 x i16], [8 x i16]* %half9, i64 0, i64 %idxprom7
-  %tmp11 = load i16, i16* %arrayidx10, align 2, !tbaa !0
+  %tmp11 = load i16, i16* %arrayidx10, align 2, !tbaa !10
   %conv12 = zext i16 %tmp11 to i32
   %shl = shl i32 %conv12, %and
   %sub15 = sub nsw i32 7, %i.01
   %idxprom16 = sext i32 %sub15 to i64
   %half18 = bitcast %union.vector_t* %va to [8 x i16]*
   %arrayidx19 = getelementptr inbounds [8 x i16], [8 x i16]* %half18, i64 0, i64 %idxprom16
-  %tmp20 = load i16, i16* %arrayidx19, align 2, !tbaa !0
+  %tmp20 = load i16, i16* %arrayidx19, align 2, !tbaa !10
   %conv21 = zext i16 %tmp20 to i32
   %sub23 = sub nsw i32 16, %and
   %shr = lshr i32 %conv21, %sub23
@@ -50,20 +50,20 @@ for.body:                                         ; preds = %entry, %for.body
   %idxprom27 = sext i32 %sub26 to i64
   %half28 = bitcast %union.vector_t* %t to [8 x i16]*
   %arrayidx29 = getelementptr inbounds [8 x i16], [8 x i16]* %half28, i64 0, i64 %idxprom27
-  store i16 %conv24, i16* %arrayidx29, align 2, !tbaa !0
+  store i16 %conv24, i16* %arrayidx29, align 2, !tbaa !10
   %inc = add nsw i32 %i.01, 1
   %cmp = icmp slt i32 %inc, 8
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   %arrayidx31 = getelementptr inbounds %union.vector_t, %union.vector_t* %t, i64 0, i32 0, i64 1
-  %tmp32 = load i64, i64* %arrayidx31, align 8, !tbaa !3
+  %tmp32 = load i64, i64* %arrayidx31, align 8, !tbaa !10
   %arrayidx35 = getelementptr inbounds %union.vector_t, %union.vector_t* %vd, i64 0, i32 0, i64 1
-  store i64 %tmp32, i64* %arrayidx35, align 8, !tbaa !3
+  store i64 %tmp32, i64* %arrayidx35, align 8, !tbaa !10
   %arrayidx37 = getelementptr inbounds %union.vector_t, %union.vector_t* %t, i64 0, i32 0, i64 0
-  %tmp38 = load i64, i64* %arrayidx37, align 8, !tbaa !3
+  %tmp38 = load i64, i64* %arrayidx37, align 8, !tbaa !10
   %arrayidx41 = getelementptr inbounds %union.vector_t, %union.vector_t* %vd, i64 0, i32 0, i64 0
-  store i64 %tmp38, i64* %arrayidx41, align 8, !tbaa !3
+  store i64 %tmp38, i64* %arrayidx41, align 8, !tbaa !10
   ret void
 }
 
@@ -124,7 +124,7 @@ for.end:                                          ; preds = %for.body
 }
 
 ; CHECK: [[TAG]] = !{[[TYPE_LL:!.*]], [[TYPE_LL]], i64 0}
-; CHECK: [[TYPE_LL]] = !{!"long long", {{!.*}}}
+; CHECK: [[TYPE_LL]] = !{!"omnipotent char", {{!.*}}}
 !0 = !{!6, !6, i64 0}
 !1 = !{!"omnipotent char", !2}
 !2 = !{!"Simple C/C++ TBAA"}
@@ -135,3 +135,4 @@ for.end:                                          ; preds = %for.body
 !7 = !{!"long long", !1}
 !8 = !{!"int", !1}
 !9 = !{!"float", !1}
+!10 = !{!1, !1, i64 0}