aboutsummaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll139
-rw-r--r--test/Transforms/CallSiteSplitting/callsite-no-splitting.ll18
-rw-r--r--test/Transforms/CodeGenPrepare/section.ll47
-rw-r--r--test/Transforms/GVN/tbaa.ll61
-rw-r--r--test/Transforms/Inline/AArch64/binop.ll291
-rw-r--r--test/Transforms/Inline/ARM/inline-fp.ll113
-rw-r--r--test/Transforms/Inline/inline-fp.ll137
-rw-r--r--test/Transforms/Inline/redundant-loads.ll18
-rw-r--r--test/Transforms/InstCombine/2011-09-03-Trampoline.ll23
-rw-r--r--test/Transforms/JumpThreading/guards.ll103
-rw-r--r--test/Transforms/LoopVectorize/legal_preheader_check.ll27
-rw-r--r--test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll48
-rw-r--r--test/Transforms/MemCpyOpt/merge-into-memset.ll45
-rw-r--r--test/Transforms/MemCpyOpt/mixed-sizes.ll36
-rw-r--r--test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll114
-rw-r--r--test/Transforms/NewGVN/tbaa.ll61
-rw-r--r--test/Transforms/PGOProfile/icp_covariant_call_return.ll3
-rw-r--r--test/Transforms/PGOProfile/icp_covariant_invoke_return.ll9
-rw-r--r--test/Transforms/PGOProfile/icp_invoke.ll17
-rw-r--r--test/Transforms/PGOProfile/icp_invoke_nouse.ll3
-rw-r--r--test/Transforms/PGOProfile/icp_vararg.ll3
-rw-r--r--test/Transforms/PGOProfile/indirect_call_promotion.ll3
-rw-r--r--test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll24
-rw-r--r--test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll125
-rw-r--r--test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll225
-rw-r--r--test/Transforms/SLPVectorizer/X86/jumbled-load.ll37
-rw-r--r--test/Transforms/SLPVectorizer/X86/store-jumbled.ll25
-rw-r--r--test/Transforms/SampleProfile/entry_counts.ll4
-rw-r--r--test/Transforms/SimplifyCFG/X86/if-conversion.ll231
29 files changed, 1391 insertions, 599 deletions
diff --git a/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll b/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll
new file mode 100644
index 000000000000..e10b04a850af
--- /dev/null
+++ b/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll
@@ -0,0 +1,139 @@
+; RUN: opt < %s -callsite-splitting -S | FileCheck %s
+; RUN: opt < %s -passes='function(callsite-splitting)' -S | FileCheck %s
+
+; CHECK-LABEL: @test_simple
+; CHECK-LABEL: Header:
+; CHECK-NEXT: br i1 undef, label %Tail.predBB1.split
+; CHECK-LABEL: TBB:
+; CHECK: br i1 %cmp, label %Tail.predBB2.split
+; CHECK-LABEL: Tail.predBB1.split:
+; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
+; CHECK-LABEL: Tail.predBB2.split:
+; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p)
+; CHECK-LABEL: Tail
+; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ]
+; CHECK: ret i32 %[[MERGED]]
+define i32 @test_simple(i32* %a, i32 %v, i32 %p) {
+Header:
+ br i1 undef, label %Tail, label %End
+
+TBB:
+ %cmp = icmp eq i32* %a, null
+ br i1 %cmp, label %Tail, label %End
+
+Tail:
+ %r = call i32 @callee(i32* %a, i32 %v, i32 %p)
+ ret i32 %r
+
+End:
+ ret i32 %v
+}
+
+; CHECK-LABEL: @test_eq_eq_eq_untaken
+; CHECK-LABEL: Header:
+; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB1.split
+; CHECK-LABEL: TBB2:
+; CHECK: br i1 %cmp2, label %Tail.predBB2.split, label %End
+; CHECK-LABEL: Tail.predBB1.split:
+; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
+; CHECK-LABEL: Tail.predBB2.split:
+; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 99)
+; CHECK-LABEL: Tail
+; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ]
+; CHECK: ret i32 %[[MERGED]]
+define i32 @test_eq_eq_eq_untaken2(i32* %a, i32 %v, i32 %p) {
+Header:
+ %tobool1 = icmp eq i32* %a, null
+ br i1 %tobool1, label %TBB1, label %Tail
+
+TBB1:
+ %cmp1 = icmp eq i32 %v, 1
+ br i1 %cmp1, label %TBB2, label %End
+
+TBB2:
+ %cmp2 = icmp eq i32 %p, 99
+ br i1 %cmp2, label %Tail, label %End
+
+Tail:
+ %r = call i32 @callee(i32* %a, i32 %v, i32 %p)
+ ret i32 %r
+
+End:
+ ret i32 %v
+}
+
+; CHECK-LABEL: @test_eq_ne_eq_untaken
+; CHECK-LABEL: Header:
+; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB1.split
+; CHECK-LABEL: TBB2:
+; CHECK: br i1 %cmp2, label %Tail.predBB2.split, label %End
+; CHECK-LABEL: Tail.predBB1.split:
+; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
+; CHECK-LABEL: Tail.predBB2.split:
+; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 99)
+; CHECK-LABEL: Tail
+; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ]
+; CHECK: ret i32 %[[MERGED]]
+define i32 @test_eq_ne_eq_untaken(i32* %a, i32 %v, i32 %p) {
+Header:
+ %tobool1 = icmp eq i32* %a, null
+ br i1 %tobool1, label %TBB1, label %Tail
+
+TBB1:
+ %cmp1 = icmp ne i32 %v, 1
+ br i1 %cmp1, label %TBB2, label %End
+
+TBB2:
+ %cmp2 = icmp eq i32 %p, 99
+ br i1 %cmp2, label %Tail, label %End
+
+Tail:
+ %r = call i32 @callee(i32* %a, i32 %v, i32 %p)
+ ret i32 %r
+
+End:
+ ret i32 %v
+}
+
+; CHECK-LABEL: @test_header_header2_tbb
+; CHECK: Header2:
+; CHECK:br i1 %tobool2, label %Tail.predBB1.split, label %TBB1
+; CHECK-LABEL: TBB2:
+; CHECK: br i1 %cmp2, label %Tail.predBB2.split, label %End
+; CHECK-LABEL: Tail.predBB1.split:
+; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10)
+; CHECK-LABEL: Tail.predBB2.split:
+; NOTE: CallSiteSplitting cannot infer that %a is null here, as it currently
+; only supports recording conditions along a single predecessor path.
+; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 99)
+; CHECK-LABEL: Tail
+; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ]
+; CHECK: ret i32 %[[MERGED]]
+define i32 @test_header_header2_tbb(i32* %a, i32 %v, i32 %p) {
+Header:
+ %tobool1 = icmp eq i32* %a, null
+ br i1 %tobool1, label %TBB1, label %Header2
+
+Header2:
+ %tobool2 = icmp eq i32 %p, 10
+ br i1 %tobool2, label %Tail, label %TBB1
+
+TBB1:
+ %cmp1 = icmp eq i32 %v, 1
+ br i1 %cmp1, label %TBB2, label %End
+
+TBB2:
+ %cmp2 = icmp eq i32 %p, 99
+ br i1 %cmp2, label %Tail, label %End
+
+Tail:
+ %r = call i32 @callee(i32* %a, i32 %v, i32 %p)
+ ret i32 %r
+
+End:
+ ret i32 %v
+}
+
+define i32 @callee(i32* %a, i32 %v, i32 %p) {
+ ret i32 10
+}
diff --git a/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll b/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll
new file mode 100644
index 000000000000..ca41bd6fc5e1
--- /dev/null
+++ b/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -callsite-splitting -S | FileCheck %s
+; RUN: opt < %s -passes='function(callsite-splitting)' -S | FileCheck %s
+
+define i32 @callee(i32*, i32, i32) {
+ ret i32 10
+}
+
+; CHECK-LABEL: @test_preds_equal
+; CHECK-NOT: split
+; CHECK: br i1 %cmp, label %Tail, label %Tail
+define i32 @test_preds_equal(i32* %a, i32 %v, i32 %p) {
+TBB:
+ %cmp = icmp eq i32* %a, null
+ br i1 %cmp, label %Tail, label %Tail
+Tail:
+ %r = call i32 @callee(i32* %a, i32 %v, i32 %p)
+ ret i32 %r
+}
diff --git a/test/Transforms/CodeGenPrepare/section.ll b/test/Transforms/CodeGenPrepare/section.ll
index 4f3144e7fc73..30598ba7afbe 100644
--- a/test/Transforms/CodeGenPrepare/section.ll
+++ b/test/Transforms/CodeGenPrepare/section.ll
@@ -4,33 +4,59 @@ target triple = "x86_64-pc-linux-gnu"
; This tests that hot/cold functions get correct section prefix assigned
-; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; CHECK: hot_func1{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
; The entry is hot
-define void @hot_func() !prof !15 {
+define void @hot_func1() !prof !15 {
ret void
}
-; For instrumentation based PGO, we should only look at entry counts,
+; CHECK: hot_func2{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; Entry is cold but inner block is hot
+define void @hot_func2(i32 %n) !prof !16 {
+entry:
+ %n.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 %n, i32* %n.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond:
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %n.addr, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end, !prof !19
+
+for.body:
+ %2 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %2, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end:
+ ret void
+}
+
+; For instrumentation based PGO, we should only look at block counts,
; not call site VP metadata (which can exist on value profiled memcpy,
; or possibly left behind after static analysis based devirtualization).
; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
define void @cold_func1() !prof !16 {
- call void @hot_func(), !prof !17
- call void @hot_func(), !prof !17
+ call void @hot_func1(), !prof !17
+ call void @hot_func1(), !prof !17
ret void
}
-; CHECK: cold_func2{{.*}}!section_prefix
+; CHECK: cold_func2{{.*}}!section_prefix ![[COLD_ID]]
define void @cold_func2() !prof !16 {
- call void @hot_func(), !prof !17
- call void @hot_func(), !prof !18
- call void @hot_func(), !prof !18
+ call void @hot_func1(), !prof !17
+ call void @hot_func1(), !prof !18
+ call void @hot_func1(), !prof !18
ret void
}
; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]]
define void @cold_func3() !prof !16 {
- call void @hot_func(), !prof !18
+ call void @hot_func1(), !prof !18
ret void
}
@@ -55,3 +81,4 @@ define void @cold_func3() !prof !16 {
!16 = !{!"function_entry_count", i64 1}
!17 = !{!"branch_weights", i32 80}
!18 = !{!"branch_weights", i32 1}
+!19 = !{!"branch_weights", i32 1000, i32 1}
diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll
index 7c05fda6cb8f..5cb4e0359970 100644
--- a/test/Transforms/GVN/tbaa.ll
+++ b/test/Transforms/GVN/tbaa.ll
@@ -1,7 +1,7 @@
; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s
define i32 @test1(i8* %p, i8* %q) {
-; CHECK: @test1(i8* %p, i8* %q)
+; CHECK-LABEL: @test1(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p)
; CHECK-NOT: tbaa
; CHECK: %c = add i32 %a, %a
@@ -12,7 +12,7 @@ define i32 @test1(i8* %p, i8* %q) {
}
define i32 @test2(i8* %p, i8* %q) {
-; CHECK: @test2(i8* %p, i8* %q)
+; CHECK-LABEL: @test2(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGC:!.*]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !0
@@ -22,7 +22,7 @@ define i32 @test2(i8* %p, i8* %q) {
}
define i32 @test3(i8* %p, i8* %q) {
-; CHECK: @test3(i8* %p, i8* %q)
+; CHECK-LABEL: @test3(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGB:!.*]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !3
@@ -32,7 +32,7 @@ define i32 @test3(i8* %p, i8* %q) {
}
define i32 @test4(i8* %p, i8* %q) {
-; CHECK: @test4(i8* %p, i8* %q)
+; CHECK-LABEL: @test4(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !1
@@ -42,8 +42,8 @@ define i32 @test4(i8* %p, i8* %q) {
}
define i32 @test5(i8* %p, i8* %q) {
-; CHECK: @test5(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
+; CHECK-LABEL: @test5(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !0
%b = call i32 @foo(i8* %p), !tbaa !1
@@ -52,8 +52,8 @@ define i32 @test5(i8* %p, i8* %q) {
}
define i32 @test6(i8* %p, i8* %q) {
-; CHECK: @test6(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
+; CHECK-LABEL: @test6(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !0
%b = call i32 @foo(i8* %p), !tbaa !3
@@ -62,7 +62,7 @@ define i32 @test6(i8* %p, i8* %q) {
}
define i32 @test7(i8* %p, i8* %q) {
-; CHECK: @test7(i8* %p, i8* %q)
+; CHECK-LABEL: @test7(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p)
; CHECK-NOT: tbaa
; CHECK: %c = add i32 %a, %a
@@ -72,10 +72,8 @@ define i32 @test7(i8* %p, i8* %q) {
ret i32 %c
}
-
-
define i32 @test8(i32* %p, i32* %q) {
-; CHECK-LABEL: test8
+; CHECK-LABEL: @test8
; CHECK-NEXT: store i32 15, i32* %p
; CHECK-NEXT: ret i32 0
; Since we know the location is invariant, we can forward the
@@ -87,8 +85,9 @@ define i32 @test8(i32* %p, i32* %q) {
%c = sub i32 %a, %b
ret i32 %c
}
+
define i32 @test9(i32* %p, i32* %q) {
-; CHECK-LABEL: test9
+; CHECK-LABEL: @test9
; CHECK-NEXT: call void @clobber()
; CHECK-NEXT: ret i32 0
; Since we know the location is invariant, we can forward the
@@ -101,16 +100,27 @@ define i32 @test9(i32* %p, i32* %q) {
ret i32 %c
}
+define i32 @test10(i8* %p, i8* %q) {
+; If one access encloses the other, then the merged access is the enclosed one
+; and not just the common final access type.
+; CHECK-LABEL: @test10
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAG_X_i:!.*]]
+; CHECK: %c = add i32 %a, %a
+ %a = call i32 @foo(i8* %p), !tbaa !15 ; TAG_X_i
+ %b = call i32 @foo(i8* %p), !tbaa !19 ; TAG_Y_x_i
+ %c = add i32 %a, %b
+ ret i32 %c
+}
declare void @clobber()
declare i32 @foo(i8*) readonly
-; CHECK: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
-; CHECK: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]}
-; CHECK: [[TYPEA]] = !{!"A", !{{.*}}}
-; CHECK: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0}
-; CHECK: [[TYPEB]] = !{!"B", [[TYPEA]]}
-; CHECK: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0}
+; CHECK-DAG: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
+; CHECK-DAG: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]}
+; CHECK-DAG: [[TYPEA]] = !{!"A", !{{.*}}}
+; CHECK-DAG: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0}
+; CHECK-DAG: [[TYPEB]] = !{!"B", [[TYPEA]]}
+; CHECK-DAG: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0}
!0 = !{!5, !5, i64 0}
!1 = !{!6, !6, i64 0}
!2 = !{!"tbaa root"}
@@ -122,8 +132,17 @@ declare i32 @foo(i8*) readonly
!8 = !{!"another root"}
!11 = !{!"scalar type", !8}
+; CHECK-DAG: [[TAG_X_i]] = !{[[TYPE_X:!.*]], [[TYPE_int:!.*]], i64 0}
+; CHECK-DAG: [[TYPE_X:!.*]] = !{!"struct X", [[TYPE_int]], i64 0}
+; CHECK-DAG: [[TYPE_int]] = !{!"int", {{!.*}}, i64 0}
+!15 = !{!16, !17, i64 0} ; TAG_X_i
+!16 = !{!"struct X", !17, i64 0} ; struct X { int i; };
+!17 = !{!"int", !18, i64 0}
+!18 = !{!"char", !2, i64 0}
-;; A TBAA structure who's only point is to have a constant location
+!19 = !{!20, !17, i64 0} ; TAG_Y_x_i
+!20 = !{!"struct Y", !16, i64 0} ; struct Y { struct X x; };
+
+; A TBAA structure who's only point is to have a constant location.
!9 = !{!"yet another root"}
!10 = !{!"node", !9, i64 1}
-
diff --git a/test/Transforms/Inline/AArch64/binop.ll b/test/Transforms/Inline/AArch64/binop.ll
new file mode 100644
index 000000000000..051528991e46
--- /dev/null
+++ b/test/Transforms/Inline/AArch64/binop.ll
@@ -0,0 +1,291 @@
+; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -o - < %s -inline-threshold=0 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+declare void @pad()
+@glbl = external global i32
+
+define i32 @outer_add1(i32 %a) {
+; CHECK-LABEL: @outer_add1(
+; CHECK-NOT: call i32 @add
+ %C = call i32 @add(i32 %a, i32 0)
+ ret i32 %C
+}
+
+define i32 @outer_add2(i32 %a) {
+; CHECK-LABEL: @outer_add2(
+; CHECK-NOT: call i32 @add
+ %C = call i32 @add(i32 0, i32 %a)
+ ret i32 %C
+}
+
+define i32 @add(i32 %a, i32 %b) {
+ %add = add i32 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i32 %add
+}
+
+
+
+define i32 @outer_sub1(i32 %a) {
+; CHECK-LABEL: @outer_sub1(
+; CHECK-NOT: call i32 @sub1
+ %C = call i32 @sub1(i32 %a, i32 0)
+ ret i32 %C
+}
+
+define i32 @sub1(i32 %a, i32 %b) {
+ %sub = sub i32 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i32 %sub
+}
+
+
+define i32 @outer_sub2(i32 %a) {
+; CHECK-LABEL: @outer_sub2(
+; CHECK-NOT: call i32 @sub2
+ %C = call i32 @sub2(i32 %a)
+ ret i32 %C
+}
+
+define i32 @sub2(i32 %a) {
+ %sub = sub i32 %a, %a
+ call void @pad()
+ ret i32 %sub
+}
+
+
+
+define i32 @outer_mul1(i32 %a) {
+; CHECK-LABEL: @outer_mul1(
+; CHECK-NOT: call i32 @mul
+ %C = call i32 @mul(i32 %a, i32 0)
+ ret i32 %C
+}
+
+define i32 @outer_mul2(i32 %a) {
+; CHECK-LABEL: @outer_mul2(
+; CHECK-NOT: call i32 @mul
+ %C = call i32 @mul(i32 %a, i32 1)
+ ret i32 %C
+}
+
+define i32 @mul(i32 %a, i32 %b) {
+ %mul = mul i32 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i32 %mul
+}
+
+
+
+define i32 @outer_div1(i32 %a) {
+; CHECK-LABEL: @outer_div1(
+; CHECK-NOT: call i32 @div1
+ %C = call i32 @div1(i32 0, i32 %a)
+ ret i32 %C
+}
+
+define i32 @outer_div2(i32 %a) {
+; CHECK-LABEL: @outer_div2(
+; CHECK-NOT: call i32 @div1
+ %C = call i32 @div1(i32 %a, i32 1)
+ ret i32 %C
+}
+
+define i32 @div1(i32 %a, i32 %b) {
+ %div = sdiv i32 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i32 %div
+}
+
+
+define i32 @outer_div3(i32 %a) {
+; CHECK-LABEL: @outer_div3(
+; CHECK-NOT: call i32 @div
+ %C = call i32 @div2(i32 %a)
+ ret i32 %C
+}
+
+define i32 @div2(i32 %a) {
+ %div = sdiv i32 %a, %a
+ call void @pad()
+ ret i32 %div
+}
+
+
+
+define i32 @outer_rem1(i32 %a) {
+; CHECK-LABEL: @outer_rem1(
+; CHECK-NOT: call i32 @rem
+ %C = call i32 @rem1(i32 0, i32 %a)
+ ret i32 %C
+}
+
+define i32 @outer_rem2(i32 %a) {
+; CHECK-LABEL: @outer_rem2(
+; CHECK-NOT: call i32 @rem
+ %C = call i32 @rem1(i32 %a, i32 1)
+ ret i32 %C
+}
+
+define i32 @rem1(i32 %a, i32 %b) {
+ %rem = urem i32 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i32 %rem
+}
+
+
+define i32 @outer_rem3(i32 %a) {
+; CHECK-LABEL: @outer_rem3(
+; CHECK-NOT: call i32 @rem
+ %C = call i32 @rem2(i32 %a)
+ ret i32 %C
+}
+
+define i32 @rem2(i32 %a) {
+ %rem = urem i32 %a, %a
+ call void @pad()
+ ret i32 %rem
+}
+
+
+
+define i32 @outer_shl1(i32 %a) {
+; CHECK-LABEL: @outer_shl1(
+; CHECK-NOT: call i32 @shl
+ %C = call i32 @shl(i32 %a, i32 0)
+ ret i32 %C
+}
+
+define i32 @shl(i32 %a, i32 %b) {
+ %shl = shl i32 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i32 %shl
+}
+
+
+
+define i32 @outer_shr1(i32 %a) {
+; CHECK-LABEL: @outer_shr1(
+; CHECK-NOT: call i32 @shr
+ %C = call i32 @shr(i32 %a, i32 0)
+ ret i32 %C
+}
+
+define i32 @shr(i32 %a, i32 %b) {
+ %shr = ashr i32 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i32 %shr
+}
+
+
+
+define i1 @outer_and1(i1 %a) {
+; check-label: @outer_and1(
+; check-not: call i1 @and1
+ %c = call i1 @and1(i1 %a, i1 false)
+ ret i1 %c
+}
+
+define i1 @outer_and2(i1 %a) {
+; check-label: @outer_and2(
+; check-not: call i1 @and1
+ %c = call i1 @and1(i1 %a, i1 true)
+ ret i1 %c
+}
+
+define i1 @and1(i1 %a, i1 %b) {
+ %and = and i1 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i1 %and
+}
+
+
+define i1 @outer_and3(i1 %a) {
+; check-label: @outer_and3(
+; check-not: call i1 @and2
+ %c = call i1 @and2(i1 %a)
+ ret i1 %c
+}
+
+define i1 @and2(i1 %a) {
+ %and = and i1 %a, %a
+ call void @pad()
+ ret i1 %and
+}
+
+
+
+define i1 @outer_or1(i1 %a) {
+; check-label: @outer_or1(
+; check-not: call i1 @or1
+ %c = call i1 @or1(i1 %a, i1 false)
+ ret i1 %c
+}
+
+define i1 @outer_or2(i1 %a) {
+; check-label: @outer_or2(
+; check-not: call i1 @or1
+ %c = call i1 @or1(i1 %a, i1 true)
+ ret i1 %c
+}
+
+define i1 @or1(i1 %a, i1 %b) {
+ %or = or i1 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i1 %or
+}
+
+
+define i1 @outer_or3(i1 %a) {
+; check-label: @outer_or3(
+; check-not: call i1 @or2
+ %c = call i1 @or2(i1 %a)
+ ret i1 %c
+}
+
+define i1 @or2(i1 %a) {
+ %or = or i1 %a, %a
+ call void @pad()
+ ret i1 %or
+}
+
+
+
+define i1 @outer_xor1(i1 %a) {
+; check-label: @outer_xor1(
+; check-not: call i1 @xor
+ %c = call i1 @xor1(i1 %a, i1 false)
+ ret i1 %c
+}
+
+define i1 @xor1(i1 %a, i1 %b) {
+ %xor = xor i1 %a, %b
+ call void @pad()
+ store i32 0, i32* @glbl
+ ret i1 %xor
+}
+
+
+define i1 @outer_xor3(i1 %a) {
+; check-label: @outer_xor3(
+; check-not: call i1 @xor
+ %c = call i1 @xor2(i1 %a)
+ ret i1 %c
+}
+
+define i1 @xor2(i1 %a) {
+ %xor = xor i1 %a, %a
+ call void @pad()
+ ret i1 %xor
+}
diff --git a/test/Transforms/Inline/ARM/inline-fp.ll b/test/Transforms/Inline/ARM/inline-fp.ll
new file mode 100644
index 000000000000..b4e76dfc7d2d
--- /dev/null
+++ b/test/Transforms/Inline/ARM/inline-fp.ll
@@ -0,0 +1,113 @@
+; RUN: opt -S -inline -mtriple=arm-eabi -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=NOFP
+; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2 -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=FULLFP
+; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2,+fp-only-sp -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=SINGLEFP
+; Make sure that soft float implementations are calculated as being more expensive
+; to the inliner.
+
+; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75)
+; NOFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75)
+; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+
+; FULLFP-DAG: single inlined into test_single with cost=0 (threshold=75)
+; FULLFP-DAG: single inlined into test_single with cost=-15000 (threshold=75)
+; FULLFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75)
+; FULLFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75)
+; FULLFP-DAG: double inlined into test_double with cost=0 (threshold=75)
+; FULLFP-DAG: double inlined into test_double with cost=-15000 (threshold=75)
+; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+
+; SINGLEFP-DAG: single inlined into test_single with cost=0 (threshold=75)
+; SINGLEFP-DAG: single inlined into test_single with cost=-15000 (threshold=75)
+; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75)
+; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75)
+; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
+; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
+; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+
+define i32 @test_single(i32 %a, i8 %b, i32 %c, i8 %d) #0 {
+ %call = call float @single(i32 %a, i8 zeroext %b)
+ %call2 = call float @single(i32 %c, i8 zeroext %d)
+ ret i32 0
+}
+
+define i32 @test_single_cheap(i32 %a, i8 %b, i32 %c, i8 %d) #0 {
+ %call = call float @single_cheap(i32 %a, i8 zeroext %b)
+ %call2 = call float @single_cheap(i32 %c, i8 zeroext %d)
+ ret i32 0
+}
+
+define i32 @test_double(i32 %a, i8 %b, i32 %c, i8 %d) #0 {
+ %call = call double @double(i32 %a, i8 zeroext %b)
+ %call2 = call double @double(i32 %c, i8 zeroext %d)
+ ret i32 0
+}
+
+define i32 @test_single_force_soft(i32 %a, i8 %b, i32 %c, i8 %d) #1 {
+ %call = call float @single_force_soft(i32 %a, i8 zeroext %b) #1
+ %call2 = call float @single_force_soft(i32 %c, i8 zeroext %d) #1
+ ret i32 0
+}
+
+define internal float @single(i32 %response, i8 zeroext %value1) #0 {
+entry:
+ %conv = zext i8 %value1 to i32
+ %sub = add nsw i32 %conv, -1
+ %conv1 = sitofp i32 %sub to float
+ %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
+ %mul = fmul float %0, 2.620000e+03
+ %conv2 = sitofp i32 %response to float
+ %sub3 = fsub float %conv2, %mul
+ %div = fdiv float %sub3, %mul
+ ret float %div
+}
+
+define internal float @single_cheap(i32 %response, i8 zeroext %value1) #0 {
+entry:
+ %conv = zext i8 %value1 to i32
+ %sub = add nsw i32 %conv, -1
+ %conv1 = bitcast i32 %sub to float
+ %conv2 = bitcast i32 %response to float
+ %0 = tail call float @llvm.pow.f32(float %conv2, float %conv1)
+ %1 = tail call float @llvm.pow.f32(float %0, float %0)
+ %2 = tail call float @llvm.pow.f32(float %1, float %1)
+ ret float %2
+}
+
+define internal double @double(i32 %response, i8 zeroext %value1) #0 {
+entry:
+ %conv = zext i8 %value1 to i32
+ %sub = add nsw i32 %conv, -1
+ %conv1 = sitofp i32 %sub to double
+ %0 = tail call double @llvm.pow.f64(double 0x3FF028F5C0000000, double %conv1)
+ %mul = fmul double %0, 2.620000e+03
+ %conv2 = sitofp i32 %response to double
+ %sub3 = fsub double %conv2, %mul
+ %div = fdiv double %sub3, %mul
+ ret double %div
+}
+
+define internal float @single_force_soft(i32 %response, i8 zeroext %value1) #1 {
+entry:
+ %conv = zext i8 %value1 to i32
+ %sub = add nsw i32 %conv, -1
+ %conv1 = sitofp i32 %sub to float
+ %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
+ %mul = fmul float %0, 2.620000e+03
+ %conv2 = sitofp i32 %response to float
+ %sub3 = fsub float %conv2, %mul
+ %div = fdiv float %sub3, %mul
+ ret float %div
+}
+
+declare float @llvm.pow.f32(float, float) optsize minsize
+declare double @llvm.pow.f64(double, double) optsize minsize
+
+attributes #0 = { optsize }
+attributes #1 = { optsize "use-soft-float"="true" "target-features"="+soft-float" }
diff --git a/test/Transforms/Inline/inline-fp.ll b/test/Transforms/Inline/inline-fp.ll
deleted file mode 100644
index dd5972fe1b8a..000000000000
--- a/test/Transforms/Inline/inline-fp.ll
+++ /dev/null
@@ -1,137 +0,0 @@
-; RUN: opt -S -inline < %s | FileCheck %s
-; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s
-; Make sure that soft float implementations are calculated as being more expensive
-; to the inliner.
-
-define i32 @test_nofp() #0 {
-; f_nofp() has the "use-soft-float" attribute, so it should never get inlined.
-; CHECK-LABEL: test_nofp
-; CHECK: call float @f_nofp
-entry:
- %responseX = alloca i32, align 4
- %responseY = alloca i32, align 4
- %responseZ = alloca i32, align 4
- %valueX = alloca i8, align 1
- %valueY = alloca i8, align 1
- %valueZ = alloca i8, align 1
-
- call void @getX(i32* %responseX, i8* %valueX)
- call void @getY(i32* %responseY, i8* %valueY)
- call void @getZ(i32* %responseZ, i8* %valueZ)
-
- %0 = load i32, i32* %responseX
- %1 = load i8, i8* %valueX
- %call = call float @f_nofp(i32 %0, i8 zeroext %1)
- %2 = load i32, i32* %responseZ
- %3 = load i8, i8* %valueZ
- %call2 = call float @f_nofp(i32 %2, i8 zeroext %3)
- %call3 = call float @fabsf(float %call)
- %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
- br i1 %cmp, label %if.end12, label %if.else
-
-if.else: ; preds = %entry
- %4 = load i32, i32* %responseY
- %5 = load i8, i8* %valueY
- %call1 = call float @f_nofp(i32 %4, i8 zeroext %5)
- %call4 = call float @fabsf(float %call1)
- %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
- br i1 %cmp5, label %if.end12, label %if.else7
-
-if.else7: ; preds = %if.else
- %call8 = call float @fabsf(float %call2)
- %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
- br i1 %cmp9, label %if.then10, label %if.end12
-
-if.then10: ; preds = %if.else7
- br label %if.end12
-
-if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7
- %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
- ret i32 %success.0
-}
-
-define i32 @test_hasfp() #0 {
-; f_hasfp() does not have the "use-soft-float" attribute, so it should get inlined.
-; CHECK-LABEL: test_hasfp
-; CHECK-NOT: call float @f_hasfp
-entry:
- %responseX = alloca i32, align 4
- %responseY = alloca i32, align 4
- %responseZ = alloca i32, align 4
- %valueX = alloca i8, align 1
- %valueY = alloca i8, align 1
- %valueZ = alloca i8, align 1
-
- call void @getX(i32* %responseX, i8* %valueX)
- call void @getY(i32* %responseY, i8* %valueY)
- call void @getZ(i32* %responseZ, i8* %valueZ)
-
- %0 = load i32, i32* %responseX
- %1 = load i8, i8* %valueX
- %call = call float @f_hasfp(i32 %0, i8 zeroext %1)
- %2 = load i32, i32* %responseZ
- %3 = load i8, i8* %valueZ
- %call2 = call float @f_hasfp(i32 %2, i8 zeroext %3)
- %call3 = call float @fabsf(float %call)
- %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
- br i1 %cmp, label %if.end12, label %if.else
-
-if.else: ; preds = %entry
- %4 = load i32, i32* %responseY
- %5 = load i8, i8* %valueY
- %call1 = call float @f_hasfp(i32 %4, i8 zeroext %5)
- %call4 = call float @fabsf(float %call1)
- %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
- br i1 %cmp5, label %if.end12, label %if.else7
-
-if.else7: ; preds = %if.else
- %call8 = call float @fabsf(float %call2)
- %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
- br i1 %cmp9, label %if.then10, label %if.end12
-
-if.then10: ; preds = %if.else7
- br label %if.end12
-
-if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7
- %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
- ret i32 %success.0
-}
-
-declare void @getX(i32*, i8*) #0
-
-declare void @getY(i32*, i8*) #0
-
-declare void @getZ(i32*, i8*) #0
-
-define internal float @f_hasfp(i32 %response, i8 zeroext %value1) #0 {
-entry:
- %conv = zext i8 %value1 to i32
- %sub = add nsw i32 %conv, -1
- %conv1 = sitofp i32 %sub to float
- %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
- %mul = fmul float %0, 2.620000e+03
- %conv2 = sitofp i32 %response to float
- %sub3 = fsub float %conv2, %mul
- %div = fdiv float %sub3, %mul
- ret float %div
-}
-
-define internal float @f_nofp(i32 %response, i8 zeroext %value1) #1 {
-entry:
- %conv = zext i8 %value1 to i32
- %sub = add nsw i32 %conv, -1
- %conv1 = sitofp i32 %sub to float
- %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
- %mul = fmul float %0, 2.620000e+03
- %conv2 = sitofp i32 %response to float
- %sub3 = fsub float %conv2, %mul
- %div = fdiv float %sub3, %mul
- ret float %div
-}
-
-declare float @fabsf(float) optsize minsize
-
-declare float @llvm.pow.f32(float, float) optsize minsize
-
-attributes #0 = { optsize }
-attributes #1 = { optsize "use-soft-float"="true" }
diff --git a/test/Transforms/Inline/redundant-loads.ll b/test/Transforms/Inline/redundant-loads.ll
index 6b89f1db484b..176f605fc73b 100644
--- a/test/Transforms/Inline/redundant-loads.ll
+++ b/test/Transforms/Inline/redundant-loads.ll
@@ -184,3 +184,21 @@ define void @inner9(i32* %a, void ()* %f) {
call void @pad()
ret void
}
+
+
+define void @outer10(i32* %a) {
+; CHECK-LABEL: @outer10(
+; CHECK: call void @inner10
+ %b = alloca i32
+ call void @inner10(i32* %a, i32* %b)
+ ret void
+}
+
+define void @inner10(i32* %a, i32* %b) {
+ %1 = load i32, i32* %a
+ store i32 %1, i32 * %b
+ %2 = load volatile i32, i32* %a ; volatile load should be kept.
+ call void @pad()
+ %3 = load volatile i32, i32* %a ; Same as the above.
+ ret void
+}
diff --git a/test/Transforms/InstCombine/2011-09-03-Trampoline.ll b/test/Transforms/InstCombine/2011-09-03-Trampoline.ll
index 1833558cbceb..7a315094a04e 100644
--- a/test/Transforms/InstCombine/2011-09-03-Trampoline.ll
+++ b/test/Transforms/InstCombine/2011-09-03-Trampoline.ll
@@ -5,18 +5,18 @@ declare i8* @llvm.adjust.trampoline(i8*)
declare i32 @f(i8 * nest, i32)
; Most common case
-define i32 @test0(i32 %n) {
+define i32 @test0(i32 %n) !dbg !4 {
%alloca = alloca [10 x i8], align 16
%gep = getelementptr [10 x i8], [10 x i8]* %alloca, i32 0, i32 0
call void @llvm.init.trampoline(i8* %gep, i8* bitcast (i32 (i8*, i32)* @f to i8*),
i8* null)
%tramp = call i8* @llvm.adjust.trampoline(i8* %gep)
%function = bitcast i8* %tramp to i32(i32)*
- %ret = call i32 %function(i32 %n)
+ %ret = call i32 %function(i32 %n), !dbg !10
ret i32 %ret
-; CHECK: define i32 @test0(i32 %n) {
-; CHECK: %ret = call i32 @f(i8* nest null, i32 %n)
+; CHECK: define i32 @test0(i32 %n) !dbg !4 {
+; CHECK: %ret = call i32 @f(i8* nest null, i32 %n), !dbg !10
}
define i32 @test1(i32 %n, i8* %trampmem) {
@@ -85,3 +85,18 @@ define i32 @test4(i32 %n) {
; CHECK: %ret1 = call i32 @f(i8* nest null, i32 %n)
; CHECK: %ret2 = call i32 @f(i8* nest null, i32 %n)
}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2)
+!1 = !DIFile(filename: "string.h", directory: "Game")
+!2 = !{}
+!3 = !{i32 1, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "passthru", scope: !1, file: !1, line: 79, type: !5, isLocal: true, isDefinition: true, scopeLine: 79, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, scope: !0, baseType: null, size: 64, align: 64)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 78, type: !7)
+!10 = !DILocation(line: 78, column: 28, scope: !4)
diff --git a/test/Transforms/JumpThreading/guards.ll b/test/Transforms/JumpThreading/guards.ll
index 53175a7b7253..c760283f9e52 100644
--- a/test/Transforms/JumpThreading/guards.ll
+++ b/test/Transforms/JumpThreading/guards.ll
@@ -278,3 +278,106 @@ L2:
L3:
ret void
}
+
+; Make sure that we don't PRE a non-speculable load across a guard.
+define void @unsafe_pre_across_guard(i8* %p, i1 %load.is.valid) {
+
+; CHECK-LABEL: @unsafe_pre_across_guard(
+; CHECK-NOT: loaded.pr
+; CHECK: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ]
+; CHECK-NEXT: %loaded = load i8, i8* %p
+; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0
+; CHECK-NEXT: br i1 %continue, label %exit, label %loop
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ]
+ %loaded = load i8, i8* %p
+ %continue = icmp eq i8 %loaded, 0
+ br i1 %continue, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
+; Make sure that we can safely PRE a speculable load across a guard.
+define void @safe_pre_across_guard(i8* noalias nocapture readonly dereferenceable(8) %p, i1 %load.is.valid) {
+
+; CHECK-LABEL: @safe_pre_across_guard(
+; CHECK: entry:
+; CHECK-NEXT: %loaded.pr = load i8, i8* %p
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %loaded = phi i8 [ %loaded, %loop ], [ %loaded.pr, %entry ]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ]
+; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0
+; CHECK-NEXT: br i1 %continue, label %exit, label %loop
+
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ]
+ %loaded = load i8, i8* %p
+ %continue = icmp eq i8 %loaded, 0
+ br i1 %continue, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
+; Make sure that we don't PRE a non-speculable load across a call which may
+; alias with the load.
+define void @unsafe_pre_across_call(i8* %p) {
+
+; CHECK-LABEL: @unsafe_pre_across_call(
+; CHECK-NOT: loaded.pr
+; CHECK: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: call i32 @f1()
+; CHECK-NEXT: %loaded = load i8, i8* %p
+; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0
+; CHECK-NEXT: br i1 %continue, label %exit, label %loop
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ call i32 @f1()
+ %loaded = load i8, i8* %p
+ %continue = icmp eq i8 %loaded, 0
+ br i1 %continue, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
+; Make sure that we can safely PRE a speculable load across a call.
+define void @safe_pre_across_call(i8* noalias nocapture readonly dereferenceable(8) %p) {
+
+; CHECK-LABEL: @safe_pre_across_call(
+; CHECK: entry:
+; CHECK-NEXT: %loaded.pr = load i8, i8* %p
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %loaded = phi i8 [ %loaded, %loop ], [ %loaded.pr, %entry ]
+; CHECK-NEXT: call i32 @f1()
+; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0
+; CHECK-NEXT: br i1 %continue, label %exit, label %loop
+
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ call i32 @f1()
+ %loaded = load i8, i8* %p
+ %continue = icmp eq i8 %loaded, 0
+ br i1 %continue, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/legal_preheader_check.ll b/test/Transforms/LoopVectorize/legal_preheader_check.ll
new file mode 100644
index 000000000000..32aa796394d6
--- /dev/null
+++ b/test/Transforms/LoopVectorize/legal_preheader_check.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-vectorize -debug -S -o /dev/null 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; D40973
+; Make sure LV legal bails out when the loop doesn't have a legal pre-header.
+
+; CHECK: LV: Loop doesn't have a legal pre-header.
+
+define void @inc(i32 %n, i8* %P) {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %BB1, label %BB2
+
+BB1:
+ indirectbr i8* %P, [label %.lr.ph]
+
+BB2:
+ br label %.lr.ph
+
+.lr.ph:
+ %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %BB1 ], [ 0, %BB2 ]
+ %indvars.iv.next = add i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:
+ ret void
+}
diff --git a/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll b/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll
new file mode 100644
index 000000000000..e3d1f6dd2b17
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+; Test memcpy-memcpy dependencies across invoke edges.
+
+; Test that memcpyopt works across the non-unwind edge of an invoke.
+
+define hidden void @test_normal(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %temp = alloca i8, i32 64
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
+ invoke void @invoke_me()
+ to label %try.cont unwind label %lpad
+
+lpad:
+ landingpad { i8*, i32 }
+ catch i8* null
+ ret void
+
+try.cont:
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false)
+ ret void
+}
+
+; Test that memcpyopt works across the unwind edge of an invoke.
+
+define hidden void @test_unwind(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %temp = alloca i8, i32 64
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
+ invoke void @invoke_me()
+ to label %try.cont unwind label %lpad
+
+lpad:
+ landingpad { i8*, i32 }
+ catch i8* null
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false)
+ ret void
+
+try.cont:
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+declare i32 @__gxx_personality_v0(...)
+declare void @invoke_me() readnone
diff --git a/test/Transforms/MemCpyOpt/merge-into-memset.ll b/test/Transforms/MemCpyOpt/merge-into-memset.ll
new file mode 100644
index 000000000000..fc31038a4e6d
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/merge-into-memset.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+; Update cached non-local dependence information when merging stores into memset.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Don't delete the memcpy in %if.then, even though it depends on an instruction
+; which will be deleted.
+
+; CHECK-LABEL: @foo
+define void @foo(i1 %c, i8* %d, i8* %e, i8* %f) {
+entry:
+ %tmp = alloca [50 x i8], align 8
+ %tmp4 = bitcast [50 x i8]* %tmp to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp4, i64 1
+ call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5
+ store i8 0, i8* %tmp4, align 8, !dbg !5
+; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %tmp1, i8* nonnull %d, i64 10, i32 1, i1 false)
+ br i1 %c, label %if.then, label %exit
+
+if.then:
+; CHECK: if.then:
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false)
+ br label %exit
+
+exit:
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "t.rs", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !DILocation(line: 8, column: 5, scope: !6)
+!6 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !7, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null}
diff --git a/test/Transforms/MemCpyOpt/mixed-sizes.ll b/test/Transforms/MemCpyOpt/mixed-sizes.ll
new file mode 100644
index 000000000000..9091fe7f56c0
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/mixed-sizes.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+; Handle memcpy-memcpy dependencies of differing sizes correctly.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Don't delete the second memcpy, even though there's an earlier
+; memcpy with a larger size from the same address.
+
+; CHECK-LABEL: @foo
+define i32 @foo(i1 %z) {
+entry:
+ %a = alloca [10 x i32]
+ %s = alloca [10 x i32]
+ %0 = bitcast [10 x i32]* %a to i8*
+ %1 = bitcast [10 x i32]* %s to i8*
+ call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 0, i64 40, i32 16, i1 false)
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %a, i64 0, i64 0
+ store i32 1, i32* %arrayidx
+ %scevgep = getelementptr [10 x i32], [10 x i32]* %s, i64 0, i64 1
+ %scevgep7 = bitcast i32* %scevgep to i8*
+ br i1 %z, label %for.body3.lr.ph, label %for.inc7.1
+
+for.body3.lr.ph: ; preds = %entry
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 17179869180, i32 4, i1 false)
+ br label %for.inc7.1
+
+for.inc7.1:
+; CHECK: for.inc7.1:
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false)
+ %2 = load i32, i32* %arrayidx
+ ret i32 %2
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)
diff --git a/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll b/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll
new file mode 100644
index 000000000000..5b0510211d9f
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+; Make sure memcpy-memcpy dependence is optimized across
+; basic blocks (conditional branches and invokes).
+
+%struct.s = type { i32, i32 }
+
+@s_foo = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4
+@s_baz = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4
+@i = external constant i8*
+
+declare void @qux()
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare i8* @__cxa_begin_catch(i8*)
+
+; A simple partial redundancy. Test that the second memcpy is optimized
+; to copy directly from the original source rather than from the temporary.
+
+; CHECK-LABEL: @wobble
+define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) {
+bb:
+ %temp = alloca i8, i32 64
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
+ br i1 %some_condition, label %more, label %out
+
+out:
+ call void @qux()
+ unreachable
+
+more:
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false)
+ ret void
+}
+
+; A CFG triangle with a partial redundancy targeting an alloca. Test that the
+; memcpy inside the triangle is optimized to copy directly from the original
+; source rather than from the temporary.
+
+; CHECK-LABEL: @foo
+define i32 @foo(i1 %t3) {
+bb:
+ %s = alloca %struct.s, align 4
+ %t = alloca %struct.s, align 4
+ %s1 = bitcast %struct.s* %s to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false)
+ br i1 %t3, label %bb4, label %bb7
+
+bb4: ; preds = %bb
+ %t5 = bitcast %struct.s* %t to i8*
+ %s6 = bitcast %struct.s* %s to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* %s6, i64 8, i32 4, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false)
+ br label %bb7
+
+bb7: ; preds = %bb4, %bb
+ %t8 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 0
+ %t9 = load i32, i32* %t8, align 4
+ %t10 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 1
+ %t11 = load i32, i32* %t10, align 4
+ %t12 = add i32 %t9, %t11
+ ret i32 %t12
+}
+
+; A CFG diamond with an invoke on one side, and a partially redundant memcpy
+; into an alloca on the other. Test that the memcpy inside the diamond is
+; optimized to copy ; directly from the original source rather than from the
+; temporary. This more complex test represents a relatively common usage
+; pattern.
+
+; CHECK-LABEL: @baz
+define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+bb:
+ %s = alloca %struct.s, align 4
+ %t = alloca %struct.s, align 4
+ %s3 = bitcast %struct.s* %s to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false)
+ br i1 %t5, label %bb6, label %bb22
+
+bb6: ; preds = %bb
+ invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null)
+ to label %bb25 unwind label %bb9
+
+bb9: ; preds = %bb6
+ %t10 = landingpad { i8*, i32 }
+ catch i8* null
+ br label %bb13
+
+bb13: ; preds = %bb9
+ %t15 = call i8* @__cxa_begin_catch(i8* null)
+ br label %bb23
+
+bb22: ; preds = %bb
+ %t23 = bitcast %struct.s* %t to i8*
+ %s24 = bitcast %struct.s* %s to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* %s24, i64 8, i32 4, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false)
+ br label %bb23
+
+bb23: ; preds = %bb22, %bb13
+ %t17 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 0
+ %t18 = load i32, i32* %t17, align 4
+ %t19 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 1
+ %t20 = load i32, i32* %t19, align 4
+ %t21 = add nsw i32 %t18, %t20
+ ret i32 %t21
+
+bb25: ; preds = %bb6
+ unreachable
+}
diff --git a/test/Transforms/NewGVN/tbaa.ll b/test/Transforms/NewGVN/tbaa.ll
index 3dcc4f8acc14..d48ededac03a 100644
--- a/test/Transforms/NewGVN/tbaa.ll
+++ b/test/Transforms/NewGVN/tbaa.ll
@@ -1,7 +1,7 @@
; RUN: opt -tbaa -basicaa -newgvn -S < %s | FileCheck %s
define i32 @test1(i8* %p, i8* %q) {
-; CHECK: @test1(i8* %p, i8* %q)
+; CHECK-LABEL: @test1(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p)
; CHECK-NOT: tbaa
; CHECK: %c = add i32 %a, %a
@@ -12,7 +12,7 @@ define i32 @test1(i8* %p, i8* %q) {
}
define i32 @test2(i8* %p, i8* %q) {
-; CHECK: @test2(i8* %p, i8* %q)
+; CHECK-LABEL: @test2(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGC:!.*]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !0
@@ -22,7 +22,7 @@ define i32 @test2(i8* %p, i8* %q) {
}
define i32 @test3(i8* %p, i8* %q) {
-; CHECK: @test3(i8* %p, i8* %q)
+; CHECK-LABEL: @test3(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGB:!.*]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !3
@@ -32,7 +32,7 @@ define i32 @test3(i8* %p, i8* %q) {
}
define i32 @test4(i8* %p, i8* %q) {
-; CHECK: @test4(i8* %p, i8* %q)
+; CHECK-LABEL: @test4(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !1
@@ -42,8 +42,8 @@ define i32 @test4(i8* %p, i8* %q) {
}
define i32 @test5(i8* %p, i8* %q) {
-; CHECK: @test5(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
+; CHECK-LABEL: @test5(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !0
%b = call i32 @foo(i8* %p), !tbaa !1
@@ -52,8 +52,8 @@ define i32 @test5(i8* %p, i8* %q) {
}
define i32 @test6(i8* %p, i8* %q) {
-; CHECK: @test6(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
+; CHECK-LABEL: @test6(i8* %p, i8* %q)
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA]]
; CHECK: %c = add i32 %a, %a
%a = call i32 @foo(i8* %p), !tbaa !0
%b = call i32 @foo(i8* %p), !tbaa !3
@@ -62,7 +62,7 @@ define i32 @test6(i8* %p, i8* %q) {
}
define i32 @test7(i8* %p, i8* %q) {
-; CHECK: @test7(i8* %p, i8* %q)
+; CHECK-LABEL: @test7(i8* %p, i8* %q)
; CHECK: call i32 @foo(i8* %p)
; CHECK-NOT: tbaa
; CHECK: %c = add i32 %a, %a
@@ -72,10 +72,8 @@ define i32 @test7(i8* %p, i8* %q) {
ret i32 %c
}
-
-
define i32 @test8(i32* %p, i32* %q) {
-; CHECK-LABEL: test8
+; CHECK-LABEL: @test8
; CHECK-NEXT: store i32 15, i32* %p
; CHECK-NEXT: ret i32 0
; Since we know the location is invariant, we can forward the
@@ -87,8 +85,9 @@ define i32 @test8(i32* %p, i32* %q) {
%c = sub i32 %a, %b
ret i32 %c
}
+
define i32 @test9(i32* %p, i32* %q) {
-; CHECK-LABEL: test9
+; CHECK-LABEL: @test9
; CHECK-NEXT: call void @clobber()
; CHECK-NEXT: ret i32 0
; Since we know the location is invariant, we can forward the
@@ -101,16 +100,27 @@ define i32 @test9(i32* %p, i32* %q) {
ret i32 %c
}
+define i32 @test10(i8* %p, i8* %q) {
+; If one access encloses the other, then the merged access is the enclosed one
+; and not just the common final access type.
+; CHECK-LABEL: @test10
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAG_X_i:!.*]]
+; CHECK: %c = add i32 %a, %a
+ %a = call i32 @foo(i8* %p), !tbaa !15 ; TAG_X_i
+ %b = call i32 @foo(i8* %p), !tbaa !19 ; TAG_Y_x_i
+ %c = add i32 %a, %b
+ ret i32 %c
+}
declare void @clobber()
declare i32 @foo(i8*) readonly
-; CHECK: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
-; CHECK: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]}
-; CHECK: [[TYPEA]] = !{!"A", !{{.*}}}
-; CHECK: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0}
-; CHECK: [[TYPEB]] = !{!"B", [[TYPEA]]}
-; CHECK: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0}
+; CHECK-DAG: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
+; CHECK-DAG: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]}
+; CHECK-DAG: [[TYPEA]] = !{!"A", !{{.*}}}
+; CHECK-DAG: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0}
+; CHECK-DAG: [[TYPEB]] = !{!"B", [[TYPEA]]}
+; CHECK-DAG: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0}
!0 = !{!5, !5, i64 0}
!1 = !{!6, !6, i64 0}
!2 = !{!"tbaa root"}
@@ -122,8 +132,17 @@ declare i32 @foo(i8*) readonly
!8 = !{!"another root"}
!11 = !{!"scalar type", !8}
+; CHECK-DAG: [[TAG_X_i]] = !{[[TYPE_X:!.*]], [[TYPE_int:!.*]], i64 0}
+; CHECK-DAG: [[TYPE_X:!.*]] = !{!"struct X", [[TYPE_int]], i64 0}
+; CHECK-DAG: [[TYPE_int]] = !{!"int", {{!.*}}, i64 0}
+!15 = !{!16, !17, i64 0} ; TAG_X_i
+!16 = !{!"struct X", !17, i64 0} ; struct X { int i; };
+!17 = !{!"int", !18, i64 0}
+!18 = !{!"char", !2, i64 0}
-;; A TBAA structure who's only point is to have a constant location
+!19 = !{!20, !17, i64 0} ; TAG_Y_x_i
+!20 = !{!"struct Y", !16, i64 0} ; struct Y { struct X x; };
+
+; A TBAA structure who's only point is to have a constant location.
!9 = !{!"yet another root"}
!10 = !{!"node", !9, i64 1}
-
diff --git a/test/Transforms/PGOProfile/icp_covariant_call_return.ll b/test/Transforms/PGOProfile/icp_covariant_call_return.ll
index fc5054e3a574..aba075461deb 100644
--- a/test/Transforms/PGOProfile/icp_covariant_call_return.ll
+++ b/test/Transforms/PGOProfile/icp_covariant_call_return.ll
@@ -22,8 +22,7 @@ entry:
%vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8
%vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0
%tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8
-; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8*
-; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*)
+; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq %struct.Base* (%struct.B*)* %tmp3, bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to %struct.Base* (%struct.B*)*)
; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
; ICALL-PROM:if.true.direct_targ:
; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D*
diff --git a/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll b/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll
index d2ff47dda0e6..0a4444783eb0 100644
--- a/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll
+++ b/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll
@@ -32,18 +32,19 @@ invoke.cont:
%vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8
%vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0
%tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8
-; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8*
-; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*)
+; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq %struct.Base* (%struct.B*)* %tmp3, bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to %struct.Base* (%struct.B*)*)
; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
; ICALL-PROM:if.true.direct_targ:
; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D*
; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = invoke %struct.Derived* @_ZN1D4funcEv(%struct.D* [[ARG_BITCAST]])
-; ICALL-PROM: to label %if.end.icp unwind label %lpad
+; ICALL-PROM: to label %if.true.direct_targ.if.end.icp_crit_edge unwind label %lpad
+; ICALL-PROM:if.true.direct_targ.if.end.icp_crit_edge:
+; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base*
+; ICALL-PROM: br label %if.end.icp
; ICALL-PROM:if.false.orig_indirect:
; ICAll-PROM: %call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1)
; ICAll-PROM: to label %invoke.cont1 unwind label %lpad
; ICALL-PROM:if.end.icp:
-; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base*
; ICALL-PROM: br label %invoke.cont1
%call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1)
to label %invoke.cont1 unwind label %lpad, !prof !1
diff --git a/test/Transforms/PGOProfile/icp_invoke.ll b/test/Transforms/PGOProfile/icp_invoke.ll
index 2ec564627aa1..1cacc1bc1aca 100644
--- a/test/Transforms/PGOProfile/icp_invoke.ll
+++ b/test/Transforms/PGOProfile/icp_invoke.ll
@@ -20,8 +20,7 @@ entry:
define i32 @_Z3goov() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
%tmp = load void ()*, void ()** @foo1, align 8
-; ICP: [[BITCAST_IC1:%[0-9]+]] = bitcast void ()* %tmp to i8*
-; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i8* [[BITCAST_IC1]], bitcast (void ()* @_ZL4bar1v to i8*)
+; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq void ()* %tmp, @_ZL4bar1v
; ICP: br i1 [[CMP_IC1]], label %[[TRUE_LABEL_IC1:.*]], label %[[FALSE_LABEL_IC1:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]]
; ICP:[[TRUE_LABEL_IC1]]:
; ICP: invoke void @_ZL4bar1v()
@@ -49,17 +48,19 @@ catch:
try.cont:
%tmp6 = load i32 ()*, i32 ()** @foo2, align 8
-; ICP: [[BITCAST_IC2:%[0-9]+]] = bitcast i32 ()* %tmp6 to i8*
-; ICP: [[CMP_IC2:%[0-9]+]] = icmp eq i8* [[BITCAST_IC2]], bitcast (i32 ()* @_ZL4bar2v to i8*)
+; ICP: [[CMP_IC2:%[0-9]+]] = icmp eq i32 ()* %tmp6, @_ZL4bar2v
; ICP: br i1 [[CMP_IC2]], label %[[TRUE_LABEL_IC2:.*]], label %[[FALSE_LABEL_IC2:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]]
; ICP:[[TRUE_LABEL_IC2]]:
-; ICP: [[RESULT_IC2:%[0-9]+]] = invoke i32 @_ZL4bar2v()
-; ICP: to label %[[DCALL_NORMAL_DEST_IC2:.*]] unwind label %lpad1
+; ICP: [[RESULT_IC2_0:%[0-9]+]] = invoke i32 @_ZL4bar2v()
+; ICP: to label %[[MERGE_BB:.*]] unwind label %lpad1
; ICP:[[FALSE_LABEL_IC2]]:
+; ICP: [[RESULT_IC2_1:%.+]] = invoke i32 %tmp6()
+; ICP: to label %[[MERGE_BB]] unwind label %lpad1
%call = invoke i32 %tmp6()
to label %try.cont8 unwind label %lpad1, !prof !3
-; ICP:[[DCALL_NORMAL_DEST_IC2]]:
+; ICP:[[MERGE_BB]]:
+; ICP: [[MERGE_PHI:%.+]] = phi i32 [ [[RESULT_IC2_1]], %[[FALSE_LABEL_IC2]] ], [ [[RESULT_IC2_0]], %[[TRUE_LABEL_IC2]] ]
; ICP: br label %try.cont8
lpad1:
%tmp7 = landingpad { i8*, i32 }
@@ -77,7 +78,7 @@ catch6:
try.cont8:
%i.0 = phi i32 [ undef, %catch6 ], [ %call, %try.cont ]
-; ICP: %i.0 = phi i32 [ undef, %catch6 ], [ %call, %[[FALSE_LABEL_IC2]] ], [ [[RESULT_IC2]], %[[DCALL_NORMAL_DEST_IC2]] ]
+; ICP: %i.0 = phi i32 [ undef, %catch6 ], [ [[MERGE_PHI]], %[[MERGE_BB]] ]
ret i32 %i.0
eh.resume:
diff --git a/test/Transforms/PGOProfile/icp_invoke_nouse.ll b/test/Transforms/PGOProfile/icp_invoke_nouse.ll
index 5a1e6358cb61..096d2e0f222e 100644
--- a/test/Transforms/PGOProfile/icp_invoke_nouse.ll
+++ b/test/Transforms/PGOProfile/icp_invoke_nouse.ll
@@ -18,8 +18,7 @@ entry:
if.end: ; preds = %entry
%fptr = load i32 ()*, i32 ()** @pfptr, align 8
-; ICP: [[BITCAST_IC1:%[0-9]+]] = bitcast i32 ()* %fptr to i8*
-; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i8* [[BITCAST_IC1]], bitcast (i32 ()* @_ZL4bar1v to i8*)
+; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i32 ()* %fptr, @_ZL4bar1v
; ICP: br i1 [[CMP_IC1]], label %[[TRUE_LABEL_IC1:.*]], label %[[FALSE_LABEL_IC1:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]]
; ICP:[[TRUE_LABEL_IC1]]:
; ICP: invoke i32 @_ZL4bar1v()
diff --git a/test/Transforms/PGOProfile/icp_vararg.ll b/test/Transforms/PGOProfile/icp_vararg.ll
index 400aab3aead7..ec243470290a 100644
--- a/test/Transforms/PGOProfile/icp_vararg.ll
+++ b/test/Transforms/PGOProfile/icp_vararg.ll
@@ -13,8 +13,7 @@ entry:
define i32 @bar() #1 {
entry:
%tmp = load i32 (i32, ...)*, i32 (i32, ...)** @foo, align 8
-; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 (i32, ...)* %tmp to i8*
-; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 (i32, ...)* @va_func to i8*)
+; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i32 (i32, ...)* %tmp, @va_func
; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
; ICALL-PROM:if.true.direct_targ:
; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 (i32, ...) @va_func(i32 3, i32 12, i32 22, i32 4)
diff --git a/test/Transforms/PGOProfile/indirect_call_promotion.ll b/test/Transforms/PGOProfile/indirect_call_promotion.ll
index 6832fecfaed3..85df5260f199 100644
--- a/test/Transforms/PGOProfile/indirect_call_promotion.ll
+++ b/test/Transforms/PGOProfile/indirect_call_promotion.ll
@@ -43,8 +43,7 @@ entry:
define i32 @bar() {
entry:
%tmp = load i32 ()*, i32 ()** @foo, align 8
-; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 ()* %tmp to i8*
-; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 ()* @func4 to i8*)
+; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i32 ()* %tmp, @func4
; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
; ICALL-PROM: if.true.direct_targ:
; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 @func4()
diff --git a/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
index 4def8ce561c0..557a83a75626 100644
--- a/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
+++ b/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
@@ -11,16 +11,20 @@
define i32 @fn1() {
; CHECK-LABEL: @fn1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 8, i32 3
-; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP7]], <4 x i32> <i32 6, i32 0, i32 0, i32 0>
-; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 0), align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 1), align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 2), align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 3), align 4
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP3]], i32 2
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 8, i32 3
+; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP11]], <4 x i32> <i32 6, i32 0, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
; CHECK-NEXT: ret i32 0
;
entry:
diff --git a/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll b/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll
deleted file mode 100644
index 5fc0298b6cef..000000000000
--- a/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll
+++ /dev/null
@@ -1,125 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -mtriple=x86_64-unknown -mattr=+avx -slp-vectorizer | FileCheck %s
-
-
-;void jumble (int * restrict A, int * restrict B) {
- ; int tmp0 = A[10]*A[0];
- ; int tmp1 = A[11]*A[1];
- ; int tmp2 = A[12]*A[3];
- ; int tmp3 = A[13]*A[2];
- ; B[0] = tmp0;
- ; B[1] = tmp1;
- ; B[2] = tmp2;
- ; B[3] = tmp3;
- ;}
-
-
- ; Function Attrs: norecurse nounwind uwtable
- define void @jumble1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
-; CHECK-LABEL: @jumble1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10
-; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 11
-; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
-; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
-; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
-; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>*
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
-; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <4 x i32> [[TMP1]], [[TMP4]]
-; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
-; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
-; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[B]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT: ret void
-;
-entry:
- %arrayidx = getelementptr inbounds i32, i32* %A, i64 10
- %0 = load i32, i32* %arrayidx, align 4
- %1 = load i32, i32* %A, align 4
- %mul = mul nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 11
- %2 = load i32, i32* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1
- %3 = load i32, i32* %arrayidx3, align 4
- %mul4 = mul nsw i32 %2, %3
- %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 12
- %4 = load i32, i32* %arrayidx5, align 4
- %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 3
- %5 = load i32, i32* %arrayidx6, align 4
- %mul7 = mul nsw i32 %4, %5
- %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 13
- %6 = load i32, i32* %arrayidx8, align 4
- %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 2
- %7 = load i32, i32* %arrayidx9, align 4
- %mul10 = mul nsw i32 %6, %7
- store i32 %mul, i32* %B, align 4
- %arrayidx12 = getelementptr inbounds i32, i32* %B, i64 1
- store i32 %mul4, i32* %arrayidx12, align 4
- %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 2
- store i32 %mul7, i32* %arrayidx13, align 4
- %arrayidx14 = getelementptr inbounds i32, i32* %B, i64 3
- store i32 %mul10, i32* %arrayidx14, align 4
- ret void
- }
-
-;Reversing the operand of MUL
- ; Function Attrs: norecurse nounwind uwtable
- define void @jumble2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
-; CHECK-LABEL: @jumble2(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10
-; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 11
-; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
-; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
-; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
-; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>*
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
-; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP1]]
-; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
-; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
-; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[B]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT: ret void
-;
-entry:
- %arrayidx = getelementptr inbounds i32, i32* %A, i64 10
- %0 = load i32, i32* %arrayidx, align 4
- %1 = load i32, i32* %A, align 4
- %mul = mul nsw i32 %1, %0
- %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 11
- %2 = load i32, i32* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1
- %3 = load i32, i32* %arrayidx3, align 4
- %mul4 = mul nsw i32 %3, %2
- %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 12
- %4 = load i32, i32* %arrayidx5, align 4
- %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 3
- %5 = load i32, i32* %arrayidx6, align 4
- %mul7 = mul nsw i32 %5, %4
- %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 13
- %6 = load i32, i32* %arrayidx8, align 4
- %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 2
- %7 = load i32, i32* %arrayidx9, align 4
- %mul10 = mul nsw i32 %7, %6
- store i32 %mul, i32* %B, align 4
- %arrayidx12 = getelementptr inbounds i32, i32* %B, i64 1
- store i32 %mul4, i32* %arrayidx12, align 4
- %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 2
- store i32 %mul7, i32* %arrayidx13, align 4
- %arrayidx14 = getelementptr inbounds i32, i32* %B, i64 3
- store i32 %mul10, i32* %arrayidx14, align 4
- ret void
- }
-
diff --git a/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
deleted file mode 100644
index 568fd9f3ac79..000000000000
--- a/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
+++ /dev/null
@@ -1,225 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -mtriple=x86_64-unknown -mattr=+avx -slp-vectorizer | FileCheck %s
-
-;void phiUsingLoads(int *restrict A, int *restrict B) {
-; int tmp0, tmp1, tmp2, tmp3;
-; for (int i = 0; i < 100; i++) {
-; if (A[0] == 0) {
-; tmp0 = A[i + 0];
-; tmp1 = A[i + 1];
-; tmp2 = A[i + 2];
-; tmp3 = A[i + 3];
-; } else if (A[25] == 0) {
-; tmp0 = A[i + 0];
-; tmp1 = A[i + 1];
-; tmp2 = A[i + 2];
-; tmp3 = A[i + 3];
-; } else if (A[50] == 0) {
-; tmp0 = A[i + 0];
-; tmp1 = A[i + 1];
-; tmp2 = A[i + 2];
-; tmp3 = A[i + 3];
-; } else if (A[75] == 0) {
-; tmp0 = A[i + 0];
-; tmp1 = A[i + 1];
-; tmp2 = A[i + 3];
-; tmp3 = A[i + 2];
-; }
-; }
-; B[0] = tmp0;
-; B[1] = tmp1;
-; B[2] = tmp2;
-; B[3] = tmp3;
-;}
-
-
-; Function Attrs: norecurse nounwind uwtable
-define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) local_unnamed_addr #0 {
-; CHECK-LABEL: @phiUsingLoads(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
-; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 25
-; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 50
-; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 75
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.cond.cleanup:
-; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
-; CHECK-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
-; CHECK-NEXT: [[ARRAYIDX66:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP27:%.*]], <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT: ret void
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP27]], [[FOR_INC]] ]
-; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
-; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT: br label [[FOR_INC]]
-; CHECK: if.else:
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4
-; CHECK-NEXT: [[CMP13:%.*]] = icmp eq i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[CMP13]], label [[IF_THEN14:%.*]], label [[IF_ELSE27:%.*]]
-; CHECK: if.then14:
-; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
-; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX17]] to <4 x i32>*
-; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4
-; CHECK-NEXT: br label [[FOR_INC]]
-; CHECK: if.else27:
-; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX28]], align 4
-; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[TMP14]], 0
-; CHECK-NEXT: br i1 [[CMP29]], label [[IF_THEN30:%.*]], label [[IF_ELSE43:%.*]]
-; CHECK: if.then30:
-; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX33]] to <4 x i32>*
-; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4
-; CHECK-NEXT: br label [[FOR_INC]]
-; CHECK: if.else43:
-; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX44]], align 4
-; CHECK-NEXT: [[CMP45:%.*]] = icmp eq i32 [[TMP20]], 0
-; CHECK-NEXT: br i1 [[CMP45]], label [[IF_THEN46:%.*]], label [[FOR_INC]]
-; CHECK: if.then46:
-; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[ARRAYIDX49]] to <4 x i32>*
-; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
-; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
-; CHECK-NEXT: br label [[FOR_INC]]
-; CHECK: for.inc:
-; CHECK-NEXT: [[TMP27]] = phi <4 x i32> [ [[TMP7]], [[IF_THEN]] ], [ [[TMP13]], [[IF_THEN14]] ], [ [[TMP19]], [[IF_THEN30]] ], [ [[TMP26]], [[IF_THEN46]] ], [ [[TMP2]], [[IF_ELSE43]] ]
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-;
-entry:
- %0 = load i32, i32* %A, align 4
- %cmp1 = icmp eq i32 %0, 0
- %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 25
- %arrayidx28 = getelementptr inbounds i32, i32* %A, i64 50
- %arrayidx44 = getelementptr inbounds i32, i32* %A, i64 75
- br label %for.body
-
-for.cond.cleanup: ; preds = %for.inc
- store i32 %tmp0.1, i32* %B, align 4
- %arrayidx64 = getelementptr inbounds i32, i32* %B, i64 1
- store i32 %tmp1.1, i32* %arrayidx64, align 4
- %arrayidx65 = getelementptr inbounds i32, i32* %B, i64 2
- store i32 %tmp2.1, i32* %arrayidx65, align 4
- %arrayidx66 = getelementptr inbounds i32, i32* %B, i64 3
- store i32 %tmp3.1, i32* %arrayidx66, align 4
- ret void
-
-for.body: ; preds = %for.inc, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
- %tmp3.0111 = phi i32 [ undef, %entry ], [ %tmp3.1, %for.inc ]
- %tmp2.0110 = phi i32 [ undef, %entry ], [ %tmp2.1, %for.inc ]
- %tmp1.0109 = phi i32 [ undef, %entry ], [ %tmp1.1, %for.inc ]
- %tmp0.0108 = phi i32 [ undef, %entry ], [ %tmp0.1, %for.inc ]
- br i1 %cmp1, label %if.then, label %if.else
-
-if.then: ; preds = %for.body
- %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
- %1 = load i32, i32* %arrayidx2, align 4
- %2 = add nuw nsw i64 %indvars.iv, 1
- %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %2
- %3 = load i32, i32* %arrayidx5, align 4
- %4 = add nuw nsw i64 %indvars.iv, 2
- %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %4
- %5 = load i32, i32* %arrayidx8, align 4
- %6 = add nuw nsw i64 %indvars.iv, 3
- %arrayidx11 = getelementptr inbounds i32, i32* %A, i64 %6
- %7 = load i32, i32* %arrayidx11, align 4
- br label %for.inc
-
-if.else: ; preds = %for.body
- %8 = load i32, i32* %arrayidx12, align 4
- %cmp13 = icmp eq i32 %8, 0
- br i1 %cmp13, label %if.then14, label %if.else27
-
-if.then14: ; preds = %if.else
- %arrayidx17 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
- %9 = load i32, i32* %arrayidx17, align 4
- %10 = add nuw nsw i64 %indvars.iv, 1
- %arrayidx20 = getelementptr inbounds i32, i32* %A, i64 %10
- %11 = load i32, i32* %arrayidx20, align 4
- %12 = add nuw nsw i64 %indvars.iv, 2
- %arrayidx23 = getelementptr inbounds i32, i32* %A, i64 %12
- %13 = load i32, i32* %arrayidx23, align 4
- %14 = add nuw nsw i64 %indvars.iv, 3
- %arrayidx26 = getelementptr inbounds i32, i32* %A, i64 %14
- %15 = load i32, i32* %arrayidx26, align 4
- br label %for.inc
-
-if.else27: ; preds = %if.else
- %16 = load i32, i32* %arrayidx28, align 4
- %cmp29 = icmp eq i32 %16, 0
- br i1 %cmp29, label %if.then30, label %if.else43
-
-if.then30: ; preds = %if.else27
- %arrayidx33 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
- %17 = load i32, i32* %arrayidx33, align 4
- %18 = add nuw nsw i64 %indvars.iv, 1
- %arrayidx36 = getelementptr inbounds i32, i32* %A, i64 %18
- %19 = load i32, i32* %arrayidx36, align 4
- %20 = add nuw nsw i64 %indvars.iv, 2
- %arrayidx39 = getelementptr inbounds i32, i32* %A, i64 %20
- %21 = load i32, i32* %arrayidx39, align 4
- %22 = add nuw nsw i64 %indvars.iv, 3
- %arrayidx42 = getelementptr inbounds i32, i32* %A, i64 %22
- %23 = load i32, i32* %arrayidx42, align 4
- br label %for.inc
-
-if.else43: ; preds = %if.else27
- %24 = load i32, i32* %arrayidx44, align 4
- %cmp45 = icmp eq i32 %24, 0
- br i1 %cmp45, label %if.then46, label %for.inc
-
-if.then46: ; preds = %if.else43
- %arrayidx49 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
- %25 = load i32, i32* %arrayidx49, align 4
- %26 = add nuw nsw i64 %indvars.iv, 1
- %arrayidx52 = getelementptr inbounds i32, i32* %A, i64 %26
- %27 = load i32, i32* %arrayidx52, align 4
- %28 = add nuw nsw i64 %indvars.iv, 3
- %arrayidx55 = getelementptr inbounds i32, i32* %A, i64 %28
- %29 = load i32, i32* %arrayidx55, align 4
- %30 = add nuw nsw i64 %indvars.iv, 2
- %arrayidx58 = getelementptr inbounds i32, i32* %A, i64 %30
- %31 = load i32, i32* %arrayidx58, align 4
- br label %for.inc
-
-for.inc: ; preds = %if.then, %if.then30, %if.else43, %if.then46, %if.then14
- %tmp0.1 = phi i32 [ %1, %if.then ], [ %9, %if.then14 ], [ %17, %if.then30 ], [ %25, %if.then46 ], [ %tmp0.0108, %if.else43 ]
- %tmp1.1 = phi i32 [ %3, %if.then ], [ %11, %if.then14 ], [ %19, %if.then30 ], [ %27, %if.then46 ], [ %tmp1.0109, %if.else43 ]
- %tmp2.1 = phi i32 [ %5, %if.then ], [ %13, %if.then14 ], [ %21, %if.then30 ], [ %29, %if.then46 ], [ %tmp2.0110, %if.else43 ]
- %tmp3.1 = phi i32 [ %7, %if.then ], [ %15, %if.then14 ], [ %23, %if.then30 ], [ %31, %if.then46 ], [ %tmp3.0111, %if.else43 ]
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 100
- br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
diff --git a/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
index be58521ed898..06e051a90b0d 100644
--- a/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
+++ b/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
@@ -5,27 +5,34 @@
define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) {
; CHECK-LABEL: @jumbled-load(
-; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0
+; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* %in, i64 0
+; CHECK-NEXT: [[LOAD_1:%.*]] = load i32, i32* [[IN_ADDR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3
+; CHECK-NEXT: [[LOAD_2:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1
+; CHECK-NEXT: [[LOAD_3:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0
+; CHECK-NEXT: [[LOAD_4:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* %inn, i64 0
+; CHECK-NEXT: [[LOAD_5:%.*]] = load i32, i32* [[INN_ADDR]], align 4
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2
+; CHECK-NEXT: [[LOAD_6:%.*]] = load i32, i32* [[GEP_4]], align 4
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3
+; CHECK-NEXT: [[LOAD_7:%.*]] = load i32, i32* [[GEP_5]], align 4
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
-; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP3]], [[TMP6]]
-; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
-; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1
-; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
-; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
+; CHECK-NEXT: [[LOAD_8:%.*]] = load i32, i32* [[GEP_6]], align 4
+; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[LOAD_3]], [[LOAD_5]]
+; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[LOAD_2]], [[LOAD_8]]
+; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[LOAD_4]], [[LOAD_7]]
+; CHECK-NEXT: [[MUL_4:%.*]] = mul i32 [[LOAD_1]], [[LOAD_6]]
+; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* %out, i64 0
+; CHECK-NEXT: store i32 [[MUL_1]], i32* [[GEP_7]], align 4
+; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* %out, i64 1
+; CHECK-NEXT: store i32 [[MUL_2]], i32* [[GEP_8]], align 4
+; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* %out, i64 2
+; CHECK-NEXT: store i32 [[MUL_3]], i32* [[GEP_9]], align 4
+; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* %out, i64 3
+; CHECK-NEXT: store i32 [[MUL_4]], i32* [[GEP_10]], align 4
; CHECK-NEXT: ret i32 undef
;
%in.addr = getelementptr inbounds i32, i32* %in, i64 0
diff --git a/test/Transforms/SLPVectorizer/X86/store-jumbled.ll b/test/Transforms/SLPVectorizer/X86/store-jumbled.ll
index 6ae763520013..1b2c76384e0b 100644
--- a/test/Transforms/SLPVectorizer/X86/store-jumbled.ll
+++ b/test/Transforms/SLPVectorizer/X86/store-jumbled.ll
@@ -6,26 +6,33 @@
define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) {
; CHECK-LABEL: @jumbled-load(
; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0
+; CHECK-NEXT: [[LOAD_1:%.*]] = load i32, i32* [[IN_ADDR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1
+; CHECK-NEXT: [[LOAD_2:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
+; CHECK-NEXT: [[LOAD_3:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+; CHECK-NEXT: [[LOAD_4:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0
+; CHECK-NEXT: [[LOAD_5:%.*]] = load i32, i32* [[INN_ADDR]], align 4
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1
+; CHECK-NEXT: [[LOAD_6:%.*]] = load i32, i32* [[GEP_4]], align 4
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2
+; CHECK-NEXT: [[LOAD_7:%.*]] = load i32, i32* [[GEP_5]], align 4
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
-; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP3]], [[TMP6]]
+; CHECK-NEXT: [[LOAD_8:%.*]] = load i32, i32* [[GEP_6]], align 4
+; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[LOAD_1]], [[LOAD_5]]
+; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[LOAD_2]], [[LOAD_6]]
+; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[LOAD_3]], [[LOAD_7]]
+; CHECK-NEXT: [[MUL_4:%.*]] = mul i32 [[LOAD_4]], [[LOAD_8]]
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
+; CHECK-NEXT: store i32 [[MUL_1]], i32* [[GEP_9]], align 4
+; CHECK-NEXT: store i32 [[MUL_2]], i32* [[GEP_7]], align 4
+; CHECK-NEXT: store i32 [[MUL_3]], i32* [[GEP_10]], align 4
+; CHECK-NEXT: store i32 [[MUL_4]], i32* [[GEP_8]], align 4
; CHECK-NEXT: ret i32 undef
;
%in.addr = getelementptr inbounds i32, i32* %in, i64 0
diff --git a/test/Transforms/SampleProfile/entry_counts.ll b/test/Transforms/SampleProfile/entry_counts.ll
index 6137a6908cf5..cab7c87e0493 100644
--- a/test/Transforms/SampleProfile/entry_counts.ll
+++ b/test/Transforms/SampleProfile/entry_counts.ll
@@ -9,8 +9,8 @@ entry:
ret void, !dbg !9
}
-; This function does not have profile, check if function_entry_count is 0
-; CHECK: {{.*}} = !{!"function_entry_count", i64 0}
+; This function does not have profile, check if function_entry_count is -1
+; CHECK: {{.*}} = !{!"function_entry_count", i64 -1}
define void @no_profile() {
entry:
ret void
diff --git a/test/Transforms/SimplifyCFG/X86/if-conversion.ll b/test/Transforms/SimplifyCFG/X86/if-conversion.ll
new file mode 100644
index 000000000000..28702572d480
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/X86/if-conversion.ll
@@ -0,0 +1,231 @@
+; RUN: opt < %s -simplifycfg -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -S | FileCheck %s
+; Avoid if-conversion if there is a long dependence chain.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; The first several cases test FindLongDependenceChain returns true, so
+; if-conversion is blocked.
+
+define i64 @test1(i64** %pp, i64* %p) {
+entry:
+ %0 = load i64*, i64** %pp, align 8
+ %1 = load i64, i64* %0, align 8
+ %cmp = icmp slt i64 %1, 0
+ %pint = ptrtoint i64* %p to i64
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+ %p1 = add i64 %pint, 8
+ br label %cond.end
+
+cond.false:
+ %p2 = or i64 %pint, 16
+ br label %cond.end
+
+cond.end:
+ %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
+ %ptr = inttoptr i64 %p3 to i64*
+ %val = load i64, i64* %ptr, align 8
+ ret i64 %val
+
+; CHECK-NOT: select
+}
+
+define i64 @test2(i64** %pp, i64* %p) {
+entry:
+ %0 = load i64*, i64** %pp, align 8
+ %1 = load i64, i64* %0, align 8
+ %cmp = icmp slt i64 %1, 0
+ %pint = ptrtoint i64* %p to i64
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+ %p1 = add i64 %pint, 8
+ br label %cond.end
+
+cond.false:
+ %p2 = add i64 %pint, 16
+ br label %cond.end
+
+cond.end:
+ %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
+ %ptr = inttoptr i64 %p3 to i64*
+ %val = load i64, i64* %ptr, align 8
+ ret i64 %val
+
+; CHECK-LABEL: @test2
+; CHECK-NOT: select
+}
+
+; The following cases test FindLongDependenceChain returns false, so
+; if-conversion will proceed.
+
+; Non trivial LatencyAdjustment.
+define i64 @test3(i64** %pp, i64* %p) {
+entry:
+ %0 = load i64*, i64** %pp, align 8
+ %1 = load i64, i64* %0, align 8
+ %cmp = icmp slt i64 %1, 0
+ %pint = ptrtoint i64* %p to i64
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+ %p1 = add i64 %pint, 8
+ br label %cond.end
+
+cond.false:
+ %p2 = or i64 %pint, 16
+ br label %cond.end
+
+cond.end:
+ %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
+ %p4 = add i64 %p3, %1
+ %ptr = inttoptr i64 %p4 to i64*
+ %val = load i64, i64* %ptr, align 8
+ ret i64 %val
+
+; CHECK-LABEL: @test3
+; CHECK: select
+}
+
+; Short dependence chain.
+define i64 @test4(i64* %pp, i64* %p) {
+entry:
+ %0 = load i64, i64* %pp, align 8
+ %cmp = icmp slt i64 %0, 0
+ %pint = ptrtoint i64* %p to i64
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+ %p1 = add i64 %pint, 8
+ br label %cond.end
+
+cond.false:
+ %p2 = or i64 %pint, 16
+ br label %cond.end
+
+cond.end:
+ %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
+ %ptr = inttoptr i64 %p3 to i64*
+ %val = load i64, i64* %ptr, align 8
+ ret i64 %val
+
+; CHECK-LABEL: @test4
+; CHECK: select
+}
+
+; High IPC.
+define i64 @test5(i64** %pp, i64* %p) {
+entry:
+ %0 = load i64*, i64** %pp, align 8
+ %1 = load i64, i64* %0, align 8
+ %cmp = icmp slt i64 %1, 0
+ %pint = ptrtoint i64* %p to i64
+ %2 = add i64 %pint, 2
+ %3 = add i64 %pint, 3
+ %4 = or i64 %pint, 16
+ %5 = and i64 %pint, 255
+
+ %6 = or i64 %2, 9
+ %7 = and i64 %3, 255
+ %8 = add i64 %4, 4
+ %9 = add i64 %5, 5
+
+ %10 = add i64 %6, 2
+ %11 = add i64 %7, 3
+ %12 = add i64 %8, 4
+ %13 = add i64 %9, 5
+
+ %14 = add i64 %10, 6
+ %15 = add i64 %11, 7
+ %16 = add i64 %12, 8
+ %17 = add i64 %13, 9
+
+ %18 = add i64 %14, 10
+ %19 = add i64 %15, 11
+ %20 = add i64 %16, 12
+ %21 = add i64 %17, 13
+
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+ %p1 = add i64 %pint, 8
+ br label %cond.end
+
+cond.false:
+ %p2 = or i64 %pint, 16
+ br label %cond.end
+
+cond.end:
+ %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
+ %ptr = inttoptr i64 %p3 to i64*
+ %val = load i64, i64* %ptr, align 8
+
+ ret i64 %val
+
+; CHECK-LABEL: @test5
+; CHECK: select
+}
+
+; Large BB size.
+define i64 @test6(i64** %pp, i64* %p) {
+entry:
+ %0 = load i64*, i64** %pp, align 8
+ %1 = load i64, i64* %0, align 8
+ %cmp = icmp slt i64 %1, 0
+ %pint = ptrtoint i64* %p to i64
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+ %p1 = add i64 %pint, 8
+ br label %cond.end
+
+cond.false:
+ %p2 = or i64 %pint, 16
+ br label %cond.end
+
+cond.end:
+ %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
+ %ptr = inttoptr i64 %p3 to i64*
+ %val = load i64, i64* %ptr, align 8
+ %2 = add i64 %pint, 2
+ %3 = add i64 %pint, 3
+ %4 = add i64 %2, 4
+ %5 = add i64 %3, 5
+ %6 = add i64 %4, 6
+ %7 = add i64 %5, 7
+ %8 = add i64 %6, 6
+ %9 = add i64 %7, 7
+ %10 = add i64 %8, 6
+ %11 = add i64 %9, 7
+ %12 = add i64 %10, 6
+ %13 = add i64 %11, 7
+ %14 = add i64 %12, 6
+ %15 = add i64 %13, 7
+ %16 = add i64 %14, 6
+ %17 = add i64 %15, 7
+ %18 = add i64 %16, 6
+ %19 = add i64 %17, 7
+ %20 = add i64 %18, 6
+ %21 = add i64 %19, 7
+ %22 = add i64 %20, 6
+ %23 = add i64 %21, 7
+ %24 = add i64 %22, 6
+ %25 = add i64 %23, 7
+ %26 = add i64 %24, 6
+ %27 = add i64 %25, 7
+ %28 = add i64 %26, 6
+ %29 = add i64 %27, 7
+ %30 = add i64 %28, 6
+ %31 = add i64 %29, 7
+ %32 = add i64 %30, 8
+ %33 = add i64 %31, 9
+ %34 = add i64 %32, %33
+ %35 = and i64 %34, 255
+ %res = add i64 %val, %35
+
+ ret i64 %res
+
+; CHECK-LABEL: @test6
+; CHECK: select
+}