diff options
Diffstat (limited to 'test/Transforms')
29 files changed, 1391 insertions, 599 deletions
diff --git a/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll b/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll new file mode 100644 index 000000000000..e10b04a850af --- /dev/null +++ b/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll @@ -0,0 +1,139 @@ +; RUN: opt < %s -callsite-splitting -S | FileCheck %s +; RUN: opt < %s -passes='function(callsite-splitting)' -S | FileCheck %s + +; CHECK-LABEL: @test_simple +; CHECK-LABEL: Header: +; CHECK-NEXT: br i1 undef, label %Tail.predBB1.split +; CHECK-LABEL: TBB: +; CHECK: br i1 %cmp, label %Tail.predBB2.split +; CHECK-LABEL: Tail.predBB1.split: +; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p) +; CHECK-LABEL: Tail.predBB2.split: +; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p) +; CHECK-LABEL: Tail +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +; CHECK: ret i32 %[[MERGED]] +define i32 @test_simple(i32* %a, i32 %v, i32 %p) { +Header: + br i1 undef, label %Tail, label %End + +TBB: + %cmp = icmp eq i32* %a, null + br i1 %cmp, label %Tail, label %End + +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +; CHECK-LABEL: @test_eq_eq_eq_untaken +; CHECK-LABEL: Header: +; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB1.split +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %Tail.predBB2.split, label %End +; CHECK-LABEL: Tail.predBB1.split: +; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) +; CHECK-LABEL: Tail.predBB2.split: +; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 99) +; CHECK-LABEL: Tail +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +; CHECK: ret i32 %[[MERGED]] +define i32 @test_eq_eq_eq_untaken2(i32* %a, i32 %v, i32 %p) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %TBB1, label %Tail + +TBB1: + %cmp1 = icmp eq i32 %v, 1 + br i1 %cmp1, label %TBB2, label %End + +TBB2: + %cmp2 = icmp eq i32 %p, 99 + br i1 %cmp2, label %Tail, label %End + +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +; CHECK-LABEL: @test_eq_ne_eq_untaken +; CHECK-LABEL: Header: +; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB1.split +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %Tail.predBB2.split, label %End +; CHECK-LABEL: Tail.predBB1.split: +; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) +; CHECK-LABEL: Tail.predBB2.split: +; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 99) +; CHECK-LABEL: Tail +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +; CHECK: ret i32 %[[MERGED]] +define i32 @test_eq_ne_eq_untaken(i32* %a, i32 %v, i32 %p) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %TBB1, label %Tail + +TBB1: + %cmp1 = icmp ne i32 %v, 1 + br i1 %cmp1, label %TBB2, label %End + +TBB2: + %cmp2 = icmp eq i32 %p, 99 + br i1 %cmp2, label %Tail, label %End + +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +; CHECK-LABEL: @test_header_header2_tbb +; CHECK: Header2: +; CHECK:br i1 %tobool2, label %Tail.predBB1.split, label %TBB1 +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %Tail.predBB2.split, label %End +; CHECK-LABEL: Tail.predBB1.split: +; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10) +; CHECK-LABEL: Tail.predBB2.split: +; NOTE: CallSiteSplitting cannot infer that %a is null here, as it currently +; only supports recording conditions along a single predecessor path. +; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 99) +; CHECK-LABEL: Tail +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +; CHECK: ret i32 %[[MERGED]] +define i32 @test_header_header2_tbb(i32* %a, i32 %v, i32 %p) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %TBB1, label %Header2 + +Header2: + %tobool2 = icmp eq i32 %p, 10 + br i1 %tobool2, label %Tail, label %TBB1 + +TBB1: + %cmp1 = icmp eq i32 %v, 1 + br i1 %cmp1, label %TBB2, label %End + +TBB2: + %cmp2 = icmp eq i32 %p, 99 + br i1 %cmp2, label %Tail, label %End + +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +define i32 @callee(i32* %a, i32 %v, i32 %p) { + ret i32 10 +} diff --git a/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll b/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll new file mode 100644 index 000000000000..ca41bd6fc5e1 --- /dev/null +++ b/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -callsite-splitting -S | FileCheck %s +; RUN: opt < %s -passes='function(callsite-splitting)' -S | FileCheck %s + +define i32 @callee(i32*, i32, i32) { + ret i32 10 +} + +; CHECK-LABEL: @test_preds_equal +; CHECK-NOT: split +; CHECK: br i1 %cmp, label %Tail, label %Tail +define i32 @test_preds_equal(i32* %a, i32 %v, i32 %p) { +TBB: + %cmp = icmp eq i32* %a, null + br i1 %cmp, label %Tail, label %Tail +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r +} diff --git a/test/Transforms/CodeGenPrepare/section.ll b/test/Transforms/CodeGenPrepare/section.ll index 4f3144e7fc73..30598ba7afbe 100644 --- a/test/Transforms/CodeGenPrepare/section.ll +++ b/test/Transforms/CodeGenPrepare/section.ll @@ -4,33 +4,59 @@ target triple = "x86_64-pc-linux-gnu" ; This tests that hot/cold functions get correct section prefix assigned -; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] +; CHECK: hot_func1{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] ; The entry is hot -define void @hot_func() !prof !15 { +define void @hot_func1() !prof !15 { ret void } -; For instrumentation based PGO, we should only look at entry counts, +; CHECK: hot_func2{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] +; Entry is cold but inner block is hot +define void @hot_func2(i32 %n) !prof !16 { +entry: + %n.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %n.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end, !prof !19 + +for.body: + %2 = load i32, i32* %i, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: + ret void +} + +; For instrumentation based PGO, we should only look at block counts, ; not call site VP metadata (which can exist on value profiled memcpy, ; or possibly left behind after static analysis based devirtualization). ; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] define void @cold_func1() !prof !16 { - call void @hot_func(), !prof !17 - call void @hot_func(), !prof !17 + call void @hot_func1(), !prof !17 + call void @hot_func1(), !prof !17 ret void } -; CHECK: cold_func2{{.*}}!section_prefix +; CHECK: cold_func2{{.*}}!section_prefix ![[COLD_ID]] define void @cold_func2() !prof !16 { - call void @hot_func(), !prof !17 - call void @hot_func(), !prof !18 - call void @hot_func(), !prof !18 + call void @hot_func1(), !prof !17 + call void @hot_func1(), !prof !18 + call void @hot_func1(), !prof !18 ret void } ; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]] define void @cold_func3() !prof !16 { - call void @hot_func(), !prof !18 + call void @hot_func1(), !prof !18 ret void } @@ -55,3 +81,4 @@ define void @cold_func3() !prof !16 { !16 = !{!"function_entry_count", i64 1} !17 = !{!"branch_weights", i32 80} !18 = !{!"branch_weights", i32 1} +!19 = !{!"branch_weights", i32 1000, i32 1} diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll index 7c05fda6cb8f..5cb4e0359970 100644 --- a/test/Transforms/GVN/tbaa.ll +++ b/test/Transforms/GVN/tbaa.ll @@ -1,7 +1,7 @@ ; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s define i32 @test1(i8* %p, i8* %q) { -; CHECK: @test1(i8* %p, i8* %q) +; CHECK-LABEL: @test1(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p) ; CHECK-NOT: tbaa ; CHECK: %c = add i32 %a, %a @@ -12,7 +12,7 @@ define i32 @test1(i8* %p, i8* %q) { } define i32 @test2(i8* %p, i8* %q) { -; CHECK: @test2(i8* %p, i8* %q) +; CHECK-LABEL: @test2(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGC:!.*]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !0 @@ -22,7 +22,7 @@ define i32 @test2(i8* %p, i8* %q) { } define i32 @test3(i8* %p, i8* %q) { -; CHECK: @test3(i8* %p, i8* %q) +; CHECK-LABEL: @test3(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGB:!.*]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !3 @@ -32,7 +32,7 @@ define i32 @test3(i8* %p, i8* %q) { } define i32 @test4(i8* %p, i8* %q) { -; CHECK: @test4(i8* %p, i8* %q) +; CHECK-LABEL: @test4(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !1 @@ -42,8 +42,8 @@ define i32 @test4(i8* %p, i8* %q) { } define i32 @test5(i8* %p, i8* %q) { -; CHECK: @test5(i8* %p, i8* %q) -; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]] +; CHECK-LABEL: @test5(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !0 %b = call i32 @foo(i8* %p), !tbaa !1 @@ -52,8 +52,8 @@ define i32 @test5(i8* %p, i8* %q) { } define i32 @test6(i8* %p, i8* %q) { -; CHECK: @test6(i8* %p, i8* %q) -; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]] +; CHECK-LABEL: @test6(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !0 %b = call i32 @foo(i8* %p), !tbaa !3 @@ -62,7 +62,7 @@ define i32 @test6(i8* %p, i8* %q) { } define i32 @test7(i8* %p, i8* %q) { -; CHECK: @test7(i8* %p, i8* %q) +; CHECK-LABEL: @test7(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p) ; CHECK-NOT: tbaa ; CHECK: %c = add i32 %a, %a @@ -72,10 +72,8 @@ define i32 @test7(i8* %p, i8* %q) { ret i32 %c } - - define i32 @test8(i32* %p, i32* %q) { -; CHECK-LABEL: test8 +; CHECK-LABEL: @test8 ; CHECK-NEXT: store i32 15, i32* %p ; CHECK-NEXT: ret i32 0 ; Since we know the location is invariant, we can forward the @@ -87,8 +85,9 @@ define i32 @test8(i32* %p, i32* %q) { %c = sub i32 %a, %b ret i32 %c } + define i32 @test9(i32* %p, i32* %q) { -; CHECK-LABEL: test9 +; CHECK-LABEL: @test9 ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: ret i32 0 ; Since we know the location is invariant, we can forward the @@ -101,16 +100,27 @@ define i32 @test9(i32* %p, i32* %q) { ret i32 %c } +define i32 @test10(i8* %p, i8* %q) { +; If one access encloses the other, then the merged access is the enclosed one +; and not just the common final access type. +; CHECK-LABEL: @test10 +; CHECK: call i32 @foo(i8* %p), !tbaa [[TAG_X_i:!.*]] +; CHECK: %c = add i32 %a, %a + %a = call i32 @foo(i8* %p), !tbaa !15 ; TAG_X_i + %b = call i32 @foo(i8* %p), !tbaa !19 ; TAG_Y_x_i + %c = add i32 %a, %b + ret i32 %c +} declare void @clobber() declare i32 @foo(i8*) readonly -; CHECK: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0} -; CHECK: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]} -; CHECK: [[TYPEA]] = !{!"A", !{{.*}}} -; CHECK: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0} -; CHECK: [[TYPEB]] = !{!"B", [[TYPEA]]} -; CHECK: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0} +; CHECK-DAG: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0} +; CHECK-DAG: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]} +; CHECK-DAG: [[TYPEA]] = !{!"A", !{{.*}}} +; CHECK-DAG: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0} +; CHECK-DAG: [[TYPEB]] = !{!"B", [[TYPEA]]} +; CHECK-DAG: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0} !0 = !{!5, !5, i64 0} !1 = !{!6, !6, i64 0} !2 = !{!"tbaa root"} @@ -122,8 +132,17 @@ declare i32 @foo(i8*) readonly !8 = !{!"another root"} !11 = !{!"scalar type", !8} +; CHECK-DAG: [[TAG_X_i]] = !{[[TYPE_X:!.*]], [[TYPE_int:!.*]], i64 0} +; CHECK-DAG: [[TYPE_X:!.*]] = !{!"struct X", [[TYPE_int]], i64 0} +; CHECK-DAG: [[TYPE_int]] = !{!"int", {{!.*}}, i64 0} +!15 = !{!16, !17, i64 0} ; TAG_X_i +!16 = !{!"struct X", !17, i64 0} ; struct X { int i; }; +!17 = !{!"int", !18, i64 0} +!18 = !{!"char", !2, i64 0} -;; A TBAA structure who's only point is to have a constant location +!19 = !{!20, !17, i64 0} ; TAG_Y_x_i +!20 = !{!"struct Y", !16, i64 0} ; struct Y { struct X x; }; + +; A TBAA structure who's only point is to have a constant location. !9 = !{!"yet another root"} !10 = !{!"node", !9, i64 1} - diff --git a/test/Transforms/Inline/AArch64/binop.ll b/test/Transforms/Inline/AArch64/binop.ll new file mode 100644 index 000000000000..051528991e46 --- /dev/null +++ b/test/Transforms/Inline/AArch64/binop.ll @@ -0,0 +1,291 @@ +; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -o - < %s -inline-threshold=0 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +declare void @pad() +@glbl = external global i32 + +define i32 @outer_add1(i32 %a) { +; CHECK-LABEL: @outer_add1( +; CHECK-NOT: call i32 @add + %C = call i32 @add(i32 %a, i32 0) + ret i32 %C +} + +define i32 @outer_add2(i32 %a) { +; CHECK-LABEL: @outer_add2( +; CHECK-NOT: call i32 @add + %C = call i32 @add(i32 0, i32 %a) + ret i32 %C +} + +define i32 @add(i32 %a, i32 %b) { + %add = add i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %add +} + + + +define i32 @outer_sub1(i32 %a) { +; CHECK-LABEL: @outer_sub1( +; CHECK-NOT: call i32 @sub1 + %C = call i32 @sub1(i32 %a, i32 0) + ret i32 %C +} + +define i32 @sub1(i32 %a, i32 %b) { + %sub = sub i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %sub +} + + +define i32 @outer_sub2(i32 %a) { +; CHECK-LABEL: @outer_sub2( +; CHECK-NOT: call i32 @sub2 + %C = call i32 @sub2(i32 %a) + ret i32 %C +} + +define i32 @sub2(i32 %a) { + %sub = sub i32 %a, %a + call void @pad() + ret i32 %sub +} + + + +define i32 @outer_mul1(i32 %a) { +; CHECK-LABEL: @outer_mul1( +; CHECK-NOT: call i32 @mul + %C = call i32 @mul(i32 %a, i32 0) + ret i32 %C +} + +define i32 @outer_mul2(i32 %a) { +; CHECK-LABEL: @outer_mul2( +; CHECK-NOT: call i32 @mul + %C = call i32 @mul(i32 %a, i32 1) + ret i32 %C +} + +define i32 @mul(i32 %a, i32 %b) { + %mul = mul i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %mul +} + + + +define i32 @outer_div1(i32 %a) { +; CHECK-LABEL: @outer_div1( +; CHECK-NOT: call i32 @div1 + %C = call i32 @div1(i32 0, i32 %a) + ret i32 %C +} + +define i32 @outer_div2(i32 %a) { +; CHECK-LABEL: @outer_div2( +; CHECK-NOT: call i32 @div1 + %C = call i32 @div1(i32 %a, i32 1) + ret i32 %C +} + +define i32 @div1(i32 %a, i32 %b) { + %div = sdiv i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %div +} + + +define i32 @outer_div3(i32 %a) { +; CHECK-LABEL: @outer_div3( +; CHECK-NOT: call i32 @div + %C = call i32 @div2(i32 %a) + ret i32 %C +} + +define i32 @div2(i32 %a) { + %div = sdiv i32 %a, %a + call void @pad() + ret i32 %div +} + + + +define i32 @outer_rem1(i32 %a) { +; CHECK-LABEL: @outer_rem1( +; CHECK-NOT: call i32 @rem + %C = call i32 @rem1(i32 0, i32 %a) + ret i32 %C +} + +define i32 @outer_rem2(i32 %a) { +; CHECK-LABEL: @outer_rem2( +; CHECK-NOT: call i32 @rem + %C = call i32 @rem1(i32 %a, i32 1) + ret i32 %C +} + +define i32 @rem1(i32 %a, i32 %b) { + %rem = urem i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %rem +} + + +define i32 @outer_rem3(i32 %a) { +; CHECK-LABEL: @outer_rem3( +; CHECK-NOT: call i32 @rem + %C = call i32 @rem2(i32 %a) + ret i32 %C +} + +define i32 @rem2(i32 %a) { + %rem = urem i32 %a, %a + call void @pad() + ret i32 %rem +} + + + +define i32 @outer_shl1(i32 %a) { +; CHECK-LABEL: @outer_shl1( +; CHECK-NOT: call i32 @shl + %C = call i32 @shl(i32 %a, i32 0) + ret i32 %C +} + +define i32 @shl(i32 %a, i32 %b) { + %shl = shl i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %shl +} + + + +define i32 @outer_shr1(i32 %a) { +; CHECK-LABEL: @outer_shr1( +; CHECK-NOT: call i32 @shr + %C = call i32 @shr(i32 %a, i32 0) + ret i32 %C +} + +define i32 @shr(i32 %a, i32 %b) { + %shr = ashr i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %shr +} + + + +define i1 @outer_and1(i1 %a) { +; check-label: @outer_and1( +; check-not: call i1 @and1 + %c = call i1 @and1(i1 %a, i1 false) + ret i1 %c +} + +define i1 @outer_and2(i1 %a) { +; check-label: @outer_and2( +; check-not: call i1 @and1 + %c = call i1 @and1(i1 %a, i1 true) + ret i1 %c +} + +define i1 @and1(i1 %a, i1 %b) { + %and = and i1 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i1 %and +} + + +define i1 @outer_and3(i1 %a) { +; check-label: @outer_and3( +; check-not: call i1 @and2 + %c = call i1 @and2(i1 %a) + ret i1 %c +} + +define i1 @and2(i1 %a) { + %and = and i1 %a, %a + call void @pad() + ret i1 %and +} + + + +define i1 @outer_or1(i1 %a) { +; check-label: @outer_or1( +; check-not: call i1 @or1 + %c = call i1 @or1(i1 %a, i1 false) + ret i1 %c +} + +define i1 @outer_or2(i1 %a) { +; check-label: @outer_or2( +; check-not: call i1 @or1 + %c = call i1 @or1(i1 %a, i1 true) + ret i1 %c +} + +define i1 @or1(i1 %a, i1 %b) { + %or = or i1 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i1 %or +} + + +define i1 @outer_or3(i1 %a) { +; check-label: @outer_or3( +; check-not: call i1 @or2 + %c = call i1 @or2(i1 %a) + ret i1 %c +} + +define i1 @or2(i1 %a) { + %or = or i1 %a, %a + call void @pad() + ret i1 %or +} + + + +define i1 @outer_xor1(i1 %a) { +; check-label: @outer_xor1( +; check-not: call i1 @xor + %c = call i1 @xor1(i1 %a, i1 false) + ret i1 %c +} + +define i1 @xor1(i1 %a, i1 %b) { + %xor = xor i1 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i1 %xor +} + + +define i1 @outer_xor3(i1 %a) { +; check-label: @outer_xor3( +; check-not: call i1 @xor + %c = call i1 @xor2(i1 %a) + ret i1 %c +} + +define i1 @xor2(i1 %a) { + %xor = xor i1 %a, %a + call void @pad() + ret i1 %xor +} diff --git a/test/Transforms/Inline/ARM/inline-fp.ll b/test/Transforms/Inline/ARM/inline-fp.ll new file mode 100644 index 000000000000..b4e76dfc7d2d --- /dev/null +++ b/test/Transforms/Inline/ARM/inline-fp.ll @@ -0,0 +1,113 @@ +; RUN: opt -S -inline -mtriple=arm-eabi -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=NOFP +; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2 -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=FULLFP +; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2,+fp-only-sp -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=SINGLEFP +; Make sure that soft float implementations are calculated as being more expensive +; to the inliner. + +; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75) +; NOFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75) +; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) + +; FULLFP-DAG: single inlined into test_single with cost=0 (threshold=75) +; FULLFP-DAG: single inlined into test_single with cost=-15000 (threshold=75) +; FULLFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75) +; FULLFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75) +; FULLFP-DAG: double inlined into test_double with cost=0 (threshold=75) +; FULLFP-DAG: double inlined into test_double with cost=-15000 (threshold=75) +; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) + +; SINGLEFP-DAG: single inlined into test_single with cost=0 (threshold=75) +; SINGLEFP-DAG: single inlined into test_single with cost=-15000 (threshold=75) +; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75) +; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75) +; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) + +define i32 @test_single(i32 %a, i8 %b, i32 %c, i8 %d) #0 { + %call = call float @single(i32 %a, i8 zeroext %b) + %call2 = call float @single(i32 %c, i8 zeroext %d) + ret i32 0 +} + +define i32 @test_single_cheap(i32 %a, i8 %b, i32 %c, i8 %d) #0 { + %call = call float @single_cheap(i32 %a, i8 zeroext %b) + %call2 = call float @single_cheap(i32 %c, i8 zeroext %d) + ret i32 0 +} + +define i32 @test_double(i32 %a, i8 %b, i32 %c, i8 %d) #0 { + %call = call double @double(i32 %a, i8 zeroext %b) + %call2 = call double @double(i32 %c, i8 zeroext %d) + ret i32 0 +} + +define i32 @test_single_force_soft(i32 %a, i8 %b, i32 %c, i8 %d) #1 { + %call = call float @single_force_soft(i32 %a, i8 zeroext %b) #1 + %call2 = call float @single_force_soft(i32 %c, i8 zeroext %d) #1 + ret i32 0 +} + +define internal float @single(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fmul float %0, 2.620000e+03 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + +define internal float @single_cheap(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = bitcast i32 %sub to float + %conv2 = bitcast i32 %response to float + %0 = tail call float @llvm.pow.f32(float %conv2, float %conv1) + %1 = tail call float @llvm.pow.f32(float %0, float %0) + %2 = tail call float @llvm.pow.f32(float %1, float %1) + ret float %2 +} + +define internal double @double(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to double + %0 = tail call double @llvm.pow.f64(double 0x3FF028F5C0000000, double %conv1) + %mul = fmul double %0, 2.620000e+03 + %conv2 = sitofp i32 %response to double + %sub3 = fsub double %conv2, %mul + %div = fdiv double %sub3, %mul + ret double %div +} + +define internal float @single_force_soft(i32 %response, i8 zeroext %value1) #1 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fmul float %0, 2.620000e+03 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + +declare float @llvm.pow.f32(float, float) optsize minsize +declare double @llvm.pow.f64(double, double) optsize minsize + +attributes #0 = { optsize } +attributes #1 = { optsize "use-soft-float"="true" "target-features"="+soft-float" } diff --git a/test/Transforms/Inline/inline-fp.ll b/test/Transforms/Inline/inline-fp.ll deleted file mode 100644 index dd5972fe1b8a..000000000000 --- a/test/Transforms/Inline/inline-fp.ll +++ /dev/null @@ -1,137 +0,0 @@ -; RUN: opt -S -inline < %s | FileCheck %s -; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s -; Make sure that soft float implementations are calculated as being more expensive -; to the inliner. - -define i32 @test_nofp() #0 { -; f_nofp() has the "use-soft-float" attribute, so it should never get inlined. -; CHECK-LABEL: test_nofp -; CHECK: call float @f_nofp -entry: - %responseX = alloca i32, align 4 - %responseY = alloca i32, align 4 - %responseZ = alloca i32, align 4 - %valueX = alloca i8, align 1 - %valueY = alloca i8, align 1 - %valueZ = alloca i8, align 1 - - call void @getX(i32* %responseX, i8* %valueX) - call void @getY(i32* %responseY, i8* %valueY) - call void @getZ(i32* %responseZ, i8* %valueZ) - - %0 = load i32, i32* %responseX - %1 = load i8, i8* %valueX - %call = call float @f_nofp(i32 %0, i8 zeroext %1) - %2 = load i32, i32* %responseZ - %3 = load i8, i8* %valueZ - %call2 = call float @f_nofp(i32 %2, i8 zeroext %3) - %call3 = call float @fabsf(float %call) - %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000 - br i1 %cmp, label %if.end12, label %if.else - -if.else: ; preds = %entry - %4 = load i32, i32* %responseY - %5 = load i8, i8* %valueY - %call1 = call float @f_nofp(i32 %4, i8 zeroext %5) - %call4 = call float @fabsf(float %call1) - %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000 - br i1 %cmp5, label %if.end12, label %if.else7 - -if.else7: ; preds = %if.else - %call8 = call float @fabsf(float %call2) - %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000 - br i1 %cmp9, label %if.then10, label %if.end12 - -if.then10: ; preds = %if.else7 - br label %if.end12 - -if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7 - %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ] - ret i32 %success.0 -} - -define i32 @test_hasfp() #0 { -; f_hasfp() does not have the "use-soft-float" attribute, so it should get inlined. -; CHECK-LABEL: test_hasfp -; CHECK-NOT: call float @f_hasfp -entry: - %responseX = alloca i32, align 4 - %responseY = alloca i32, align 4 - %responseZ = alloca i32, align 4 - %valueX = alloca i8, align 1 - %valueY = alloca i8, align 1 - %valueZ = alloca i8, align 1 - - call void @getX(i32* %responseX, i8* %valueX) - call void @getY(i32* %responseY, i8* %valueY) - call void @getZ(i32* %responseZ, i8* %valueZ) - - %0 = load i32, i32* %responseX - %1 = load i8, i8* %valueX - %call = call float @f_hasfp(i32 %0, i8 zeroext %1) - %2 = load i32, i32* %responseZ - %3 = load i8, i8* %valueZ - %call2 = call float @f_hasfp(i32 %2, i8 zeroext %3) - %call3 = call float @fabsf(float %call) - %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000 - br i1 %cmp, label %if.end12, label %if.else - -if.else: ; preds = %entry - %4 = load i32, i32* %responseY - %5 = load i8, i8* %valueY - %call1 = call float @f_hasfp(i32 %4, i8 zeroext %5) - %call4 = call float @fabsf(float %call1) - %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000 - br i1 %cmp5, label %if.end12, label %if.else7 - -if.else7: ; preds = %if.else - %call8 = call float @fabsf(float %call2) - %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000 - br i1 %cmp9, label %if.then10, label %if.end12 - -if.then10: ; preds = %if.else7 - br label %if.end12 - -if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7 - %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ] - ret i32 %success.0 -} - -declare void @getX(i32*, i8*) #0 - -declare void @getY(i32*, i8*) #0 - -declare void @getZ(i32*, i8*) #0 - -define internal float @f_hasfp(i32 %response, i8 zeroext %value1) #0 { -entry: - %conv = zext i8 %value1 to i32 - %sub = add nsw i32 %conv, -1 - %conv1 = sitofp i32 %sub to float - %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) - %mul = fmul float %0, 2.620000e+03 - %conv2 = sitofp i32 %response to float - %sub3 = fsub float %conv2, %mul - %div = fdiv float %sub3, %mul - ret float %div -} - -define internal float @f_nofp(i32 %response, i8 zeroext %value1) #1 { -entry: - %conv = zext i8 %value1 to i32 - %sub = add nsw i32 %conv, -1 - %conv1 = sitofp i32 %sub to float - %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) - %mul = fmul float %0, 2.620000e+03 - %conv2 = sitofp i32 %response to float - %sub3 = fsub float %conv2, %mul - %div = fdiv float %sub3, %mul - ret float %div -} - -declare float @fabsf(float) optsize minsize - -declare float @llvm.pow.f32(float, float) optsize minsize - -attributes #0 = { optsize } -attributes #1 = { optsize "use-soft-float"="true" } diff --git a/test/Transforms/Inline/redundant-loads.ll b/test/Transforms/Inline/redundant-loads.ll index 6b89f1db484b..176f605fc73b 100644 --- a/test/Transforms/Inline/redundant-loads.ll +++ b/test/Transforms/Inline/redundant-loads.ll @@ -184,3 +184,21 @@ define void @inner9(i32* %a, void ()* %f) { call void @pad() ret void } + + +define void @outer10(i32* %a) { +; CHECK-LABEL: @outer10( +; CHECK: call void @inner10 + %b = alloca i32 + call void @inner10(i32* %a, i32* %b) + ret void +} + +define void @inner10(i32* %a, i32* %b) { + %1 = load i32, i32* %a + store i32 %1, i32 * %b + %2 = load volatile i32, i32* %a ; volatile load should be kept. + call void @pad() + %3 = load volatile i32, i32* %a ; Same as the above. + ret void +} diff --git a/test/Transforms/InstCombine/2011-09-03-Trampoline.ll b/test/Transforms/InstCombine/2011-09-03-Trampoline.ll index 1833558cbceb..7a315094a04e 100644 --- a/test/Transforms/InstCombine/2011-09-03-Trampoline.ll +++ b/test/Transforms/InstCombine/2011-09-03-Trampoline.ll @@ -5,18 +5,18 @@ declare i8* @llvm.adjust.trampoline(i8*) declare i32 @f(i8 * nest, i32) ; Most common case -define i32 @test0(i32 %n) { +define i32 @test0(i32 %n) !dbg !4 { %alloca = alloca [10 x i8], align 16 %gep = getelementptr [10 x i8], [10 x i8]* %alloca, i32 0, i32 0 call void @llvm.init.trampoline(i8* %gep, i8* bitcast (i32 (i8*, i32)* @f to i8*), i8* null) %tramp = call i8* @llvm.adjust.trampoline(i8* %gep) %function = bitcast i8* %tramp to i32(i32)* - %ret = call i32 %function(i32 %n) + %ret = call i32 %function(i32 %n), !dbg !10 ret i32 %ret -; CHECK: define i32 @test0(i32 %n) { -; CHECK: %ret = call i32 @f(i8* nest null, i32 %n) +; CHECK: define i32 @test0(i32 %n) !dbg !4 { +; CHECK: %ret = call i32 @f(i8* nest null, i32 %n), !dbg !10 } define i32 @test1(i32 %n, i8* %trampmem) { @@ -85,3 +85,18 @@ define i32 @test4(i32 %n) { ; CHECK: %ret1 = call i32 @f(i8* nest null, i32 %n) ; CHECK: %ret2 = call i32 @f(i8* nest null, i32 %n) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) +!1 = !DIFile(filename: "string.h", directory: "Game") +!2 = !{} +!3 = !{i32 1, !"Debug Info Version", i32 3} +!4 = distinct !DISubprogram(name: "passthru", scope: !1, file: !1, line: 79, type: !5, isLocal: true, isDefinition: true, scopeLine: 79, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !8) +!5 = !DISubroutineType(types: !6) +!6 = !{!7} +!7 = !DIDerivedType(tag: DW_TAG_pointer_type, scope: !0, baseType: null, size: 64, align: 64) +!8 = !{!9} +!9 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 78, type: !7) +!10 = !DILocation(line: 78, column: 28, scope: !4) diff --git a/test/Transforms/JumpThreading/guards.ll b/test/Transforms/JumpThreading/guards.ll index 53175a7b7253..c760283f9e52 100644 --- a/test/Transforms/JumpThreading/guards.ll +++ b/test/Transforms/JumpThreading/guards.ll @@ -278,3 +278,106 @@ L2: L3: ret void } + +; Make sure that we don't PRE a non-speculable load across a guard. +define void @unsafe_pre_across_guard(i8* %p, i1 %load.is.valid) { + +; CHECK-LABEL: @unsafe_pre_across_guard( +; CHECK-NOT: loaded.pr +; CHECK: entry: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ] +; CHECK-NEXT: %loaded = load i8, i8* %p +; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0 +; CHECK-NEXT: br i1 %continue, label %exit, label %loop +entry: + br label %loop + +loop: ; preds = %loop, %entry + call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ] + %loaded = load i8, i8* %p + %continue = icmp eq i8 %loaded, 0 + br i1 %continue, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +; Make sure that we can safely PRE a speculable load across a guard. +define void @safe_pre_across_guard(i8* noalias nocapture readonly dereferenceable(8) %p, i1 %load.is.valid) { + +; CHECK-LABEL: @safe_pre_across_guard( +; CHECK: entry: +; CHECK-NEXT: %loaded.pr = load i8, i8* %p +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %loaded = phi i8 [ %loaded, %loop ], [ %loaded.pr, %entry ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ] +; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0 +; CHECK-NEXT: br i1 %continue, label %exit, label %loop + +entry: + br label %loop + +loop: ; preds = %loop, %entry + call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ] + %loaded = load i8, i8* %p + %continue = icmp eq i8 %loaded, 0 + br i1 %continue, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +; Make sure that we don't PRE a non-speculable load across a call which may +; alias with the load. +define void @unsafe_pre_across_call(i8* %p) { + +; CHECK-LABEL: @unsafe_pre_across_call( +; CHECK-NOT: loaded.pr +; CHECK: entry: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: call i32 @f1() +; CHECK-NEXT: %loaded = load i8, i8* %p +; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0 +; CHECK-NEXT: br i1 %continue, label %exit, label %loop +entry: + br label %loop + +loop: ; preds = %loop, %entry + call i32 @f1() + %loaded = load i8, i8* %p + %continue = icmp eq i8 %loaded, 0 + br i1 %continue, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +; Make sure that we can safely PRE a speculable load across a call. +define void @safe_pre_across_call(i8* noalias nocapture readonly dereferenceable(8) %p) { + +; CHECK-LABEL: @safe_pre_across_call( +; CHECK: entry: +; CHECK-NEXT: %loaded.pr = load i8, i8* %p +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %loaded = phi i8 [ %loaded, %loop ], [ %loaded.pr, %entry ] +; CHECK-NEXT: call i32 @f1() +; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0 +; CHECK-NEXT: br i1 %continue, label %exit, label %loop + +entry: + br label %loop + +loop: ; preds = %loop, %entry + call i32 @f1() + %loaded = load i8, i8* %p + %continue = icmp eq i8 %loaded, 0 + br i1 %continue, label %exit, label %loop + +exit: ; preds = %loop + ret void +} diff --git a/test/Transforms/LoopVectorize/legal_preheader_check.ll b/test/Transforms/LoopVectorize/legal_preheader_check.ll new file mode 100644 index 000000000000..32aa796394d6 --- /dev/null +++ b/test/Transforms/LoopVectorize/legal_preheader_check.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -loop-vectorize -debug -S -o /dev/null 2>&1 | FileCheck %s +; REQUIRES: asserts + +; D40973 +; Make sure LV legal bails out when the loop doesn't have a legal pre-header. + +; CHECK: LV: Loop doesn't have a legal pre-header. + +define void @inc(i32 %n, i8* %P) { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %BB1, label %BB2 + +BB1: + indirectbr i8* %P, [label %.lr.ph] + +BB2: + br label %.lr.ph + +.lr.ph: + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %BB1 ], [ 0, %BB2 ] + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: + ret void +} diff --git a/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll b/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll new file mode 100644 index 000000000000..e3d1f6dd2b17 --- /dev/null +++ b/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -memcpyopt -S | FileCheck %s +; Test memcpy-memcpy dependencies across invoke edges. + +; Test that memcpyopt works across the non-unwind edge of an invoke. + +define hidden void @test_normal(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %temp = alloca i8, i32 64 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) + invoke void @invoke_me() + to label %try.cont unwind label %lpad + +lpad: + landingpad { i8*, i32 } + catch i8* null + ret void + +try.cont: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false) + ret void +} + +; Test that memcpyopt works across the unwind edge of an invoke. + +define hidden void @test_unwind(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %temp = alloca i8, i32 64 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) + invoke void @invoke_me() + to label %try.cont unwind label %lpad + +lpad: + landingpad { i8*, i32 } + catch i8* null + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false) + ret void + +try.cont: + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) +declare i32 @__gxx_personality_v0(...) +declare void @invoke_me() readnone diff --git a/test/Transforms/MemCpyOpt/merge-into-memset.ll b/test/Transforms/MemCpyOpt/merge-into-memset.ll new file mode 100644 index 000000000000..fc31038a4e6d --- /dev/null +++ b/test/Transforms/MemCpyOpt/merge-into-memset.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -memcpyopt -S | FileCheck %s +; Update cached non-local dependence information when merging stores into memset. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Don't delete the memcpy in %if.then, even though it depends on an instruction +; which will be deleted. + +; CHECK-LABEL: @foo +define void @foo(i1 %c, i8* %d, i8* %e, i8* %f) { +entry: + %tmp = alloca [50 x i8], align 8 + %tmp4 = bitcast [50 x i8]* %tmp to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp4, i64 1 + call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5 + store i8 0, i8* %tmp4, align 8, !dbg !5 +; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %tmp1, i8* nonnull %d, i64 10, i32 1, i1 false) + br i1 %c, label %if.then, label %exit + +if.then: +; CHECK: if.then: +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false) + br label %exit + +exit: + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "t.rs", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !DILocation(line: 8, column: 5, scope: !6) +!6 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !7, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !8) +!8 = !{null} diff --git a/test/Transforms/MemCpyOpt/mixed-sizes.ll b/test/Transforms/MemCpyOpt/mixed-sizes.ll new file mode 100644 index 000000000000..9091fe7f56c0 --- /dev/null +++ b/test/Transforms/MemCpyOpt/mixed-sizes.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -memcpyopt -S | FileCheck %s +; Handle memcpy-memcpy dependencies of differing sizes correctly. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Don't delete the second memcpy, even though there's an earlier +; memcpy with a larger size from the same address. + +; CHECK-LABEL: @foo +define i32 @foo(i1 %z) { +entry: + %a = alloca [10 x i32] + %s = alloca [10 x i32] + %0 = bitcast [10 x i32]* %a to i8* + %1 = bitcast [10 x i32]* %s to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 0, i64 40, i32 16, i1 false) + %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %a, i64 0, i64 0 + store i32 1, i32* %arrayidx + %scevgep = getelementptr [10 x i32], [10 x i32]* %s, i64 0, i64 1 + %scevgep7 = bitcast i32* %scevgep to i8* + br i1 %z, label %for.body3.lr.ph, label %for.inc7.1 + +for.body3.lr.ph: ; preds = %entry + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 17179869180, i32 4, i1 false) + br label %for.inc7.1 + +for.inc7.1: +; CHECK: for.inc7.1: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false) + %2 = load i32, i32* %arrayidx + ret i32 %2 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1) diff --git a/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll b/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll new file mode 100644 index 000000000000..5b0510211d9f --- /dev/null +++ b/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll @@ -0,0 +1,114 @@ +; RUN: opt < %s -memcpyopt -S | FileCheck %s +; Make sure memcpy-memcpy dependence is optimized across +; basic blocks (conditional branches and invokes). + +%struct.s = type { i32, i32 } + +@s_foo = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 +@s_baz = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 +@i = external constant i8* + +declare void @qux() +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) +declare void @__cxa_throw(i8*, i8*, i8*) +declare i32 @__gxx_personality_v0(...) +declare i8* @__cxa_begin_catch(i8*) + +; A simple partial redundancy. Test that the second memcpy is optimized +; to copy directly from the original source rather than from the temporary. + +; CHECK-LABEL: @wobble +define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) { +bb: + %temp = alloca i8, i32 64 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) + br i1 %some_condition, label %more, label %out + +out: + call void @qux() + unreachable + +more: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false) + ret void +} + +; A CFG triangle with a partial redundancy targeting an alloca. Test that the +; memcpy inside the triangle is optimized to copy directly from the original +; source rather than from the temporary. + +; CHECK-LABEL: @foo +define i32 @foo(i1 %t3) { +bb: + %s = alloca %struct.s, align 4 + %t = alloca %struct.s, align 4 + %s1 = bitcast %struct.s* %s to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false) + br i1 %t3, label %bb4, label %bb7 + +bb4: ; preds = %bb + %t5 = bitcast %struct.s* %t to i8* + %s6 = bitcast %struct.s* %s to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* %s6, i64 8, i32 4, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false) + br label %bb7 + +bb7: ; preds = %bb4, %bb + %t8 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 0 + %t9 = load i32, i32* %t8, align 4 + %t10 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 1 + %t11 = load i32, i32* %t10, align 4 + %t12 = add i32 %t9, %t11 + ret i32 %t12 +} + +; A CFG diamond with an invoke on one side, and a partially redundant memcpy +; into an alloca on the other. Test that the memcpy inside the diamond is +; optimized to copy ; directly from the original source rather than from the +; temporary. This more complex test represents a relatively common usage +; pattern. + +; CHECK-LABEL: @baz +define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +bb: + %s = alloca %struct.s, align 4 + %t = alloca %struct.s, align 4 + %s3 = bitcast %struct.s* %s to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false) + br i1 %t5, label %bb6, label %bb22 + +bb6: ; preds = %bb + invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) + to label %bb25 unwind label %bb9 + +bb9: ; preds = %bb6 + %t10 = landingpad { i8*, i32 } + catch i8* null + br label %bb13 + +bb13: ; preds = %bb9 + %t15 = call i8* @__cxa_begin_catch(i8* null) + br label %bb23 + +bb22: ; preds = %bb + %t23 = bitcast %struct.s* %t to i8* + %s24 = bitcast %struct.s* %s to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* %s24, i64 8, i32 4, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false) + br label %bb23 + +bb23: ; preds = %bb22, %bb13 + %t17 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 0 + %t18 = load i32, i32* %t17, align 4 + %t19 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 1 + %t20 = load i32, i32* %t19, align 4 + %t21 = add nsw i32 %t18, %t20 + ret i32 %t21 + +bb25: ; preds = %bb6 + unreachable +} diff --git a/test/Transforms/NewGVN/tbaa.ll b/test/Transforms/NewGVN/tbaa.ll index 3dcc4f8acc14..d48ededac03a 100644 --- a/test/Transforms/NewGVN/tbaa.ll +++ b/test/Transforms/NewGVN/tbaa.ll @@ -1,7 +1,7 @@ ; RUN: opt -tbaa -basicaa -newgvn -S < %s | FileCheck %s define i32 @test1(i8* %p, i8* %q) { -; CHECK: @test1(i8* %p, i8* %q) +; CHECK-LABEL: @test1(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p) ; CHECK-NOT: tbaa ; CHECK: %c = add i32 %a, %a @@ -12,7 +12,7 @@ define i32 @test1(i8* %p, i8* %q) { } define i32 @test2(i8* %p, i8* %q) { -; CHECK: @test2(i8* %p, i8* %q) +; CHECK-LABEL: @test2(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGC:!.*]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !0 @@ -22,7 +22,7 @@ define i32 @test2(i8* %p, i8* %q) { } define i32 @test3(i8* %p, i8* %q) { -; CHECK: @test3(i8* %p, i8* %q) +; CHECK-LABEL: @test3(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGB:!.*]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !3 @@ -32,7 +32,7 @@ define i32 @test3(i8* %p, i8* %q) { } define i32 @test4(i8* %p, i8* %q) { -; CHECK: @test4(i8* %p, i8* %q) +; CHECK-LABEL: @test4(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !1 @@ -42,8 +42,8 @@ define i32 @test4(i8* %p, i8* %q) { } define i32 @test5(i8* %p, i8* %q) { -; CHECK: @test5(i8* %p, i8* %q) -; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]] +; CHECK-LABEL: @test5(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !0 %b = call i32 @foo(i8* %p), !tbaa !1 @@ -52,8 +52,8 @@ define i32 @test5(i8* %p, i8* %q) { } define i32 @test6(i8* %p, i8* %q) { -; CHECK: @test6(i8* %p, i8* %q) -; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]] +; CHECK-LABEL: @test6(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA]] ; CHECK: %c = add i32 %a, %a %a = call i32 @foo(i8* %p), !tbaa !0 %b = call i32 @foo(i8* %p), !tbaa !3 @@ -62,7 +62,7 @@ define i32 @test6(i8* %p, i8* %q) { } define i32 @test7(i8* %p, i8* %q) { -; CHECK: @test7(i8* %p, i8* %q) +; CHECK-LABEL: @test7(i8* %p, i8* %q) ; CHECK: call i32 @foo(i8* %p) ; CHECK-NOT: tbaa ; CHECK: %c = add i32 %a, %a @@ -72,10 +72,8 @@ define i32 @test7(i8* %p, i8* %q) { ret i32 %c } - - define i32 @test8(i32* %p, i32* %q) { -; CHECK-LABEL: test8 +; CHECK-LABEL: @test8 ; CHECK-NEXT: store i32 15, i32* %p ; CHECK-NEXT: ret i32 0 ; Since we know the location is invariant, we can forward the @@ -87,8 +85,9 @@ define i32 @test8(i32* %p, i32* %q) { %c = sub i32 %a, %b ret i32 %c } + define i32 @test9(i32* %p, i32* %q) { -; CHECK-LABEL: test9 +; CHECK-LABEL: @test9 ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: ret i32 0 ; Since we know the location is invariant, we can forward the @@ -101,16 +100,27 @@ define i32 @test9(i32* %p, i32* %q) { ret i32 %c } +define i32 @test10(i8* %p, i8* %q) { +; If one access encloses the other, then the merged access is the enclosed one +; and not just the common final access type. +; CHECK-LABEL: @test10 +; CHECK: call i32 @foo(i8* %p), !tbaa [[TAG_X_i:!.*]] +; CHECK: %c = add i32 %a, %a + %a = call i32 @foo(i8* %p), !tbaa !15 ; TAG_X_i + %b = call i32 @foo(i8* %p), !tbaa !19 ; TAG_Y_x_i + %c = add i32 %a, %b + ret i32 %c +} declare void @clobber() declare i32 @foo(i8*) readonly -; CHECK: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0} -; CHECK: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]} -; CHECK: [[TYPEA]] = !{!"A", !{{.*}}} -; CHECK: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0} -; CHECK: [[TYPEB]] = !{!"B", [[TYPEA]]} -; CHECK: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0} +; CHECK-DAG: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0} +; CHECK-DAG: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]} +; CHECK-DAG: [[TYPEA]] = !{!"A", !{{.*}}} +; CHECK-DAG: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0} +; CHECK-DAG: [[TYPEB]] = !{!"B", [[TYPEA]]} +; CHECK-DAG: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0} !0 = !{!5, !5, i64 0} !1 = !{!6, !6, i64 0} !2 = !{!"tbaa root"} @@ -122,8 +132,17 @@ declare i32 @foo(i8*) readonly !8 = !{!"another root"} !11 = !{!"scalar type", !8} +; CHECK-DAG: [[TAG_X_i]] = !{[[TYPE_X:!.*]], [[TYPE_int:!.*]], i64 0} +; CHECK-DAG: [[TYPE_X:!.*]] = !{!"struct X", [[TYPE_int]], i64 0} +; CHECK-DAG: [[TYPE_int]] = !{!"int", {{!.*}}, i64 0} +!15 = !{!16, !17, i64 0} ; TAG_X_i +!16 = !{!"struct X", !17, i64 0} ; struct X { int i; }; +!17 = !{!"int", !18, i64 0} +!18 = !{!"char", !2, i64 0} -;; A TBAA structure who's only point is to have a constant location +!19 = !{!20, !17, i64 0} ; TAG_Y_x_i +!20 = !{!"struct Y", !16, i64 0} ; struct Y { struct X x; }; + +; A TBAA structure who's only point is to have a constant location. !9 = !{!"yet another root"} !10 = !{!"node", !9, i64 1} - diff --git a/test/Transforms/PGOProfile/icp_covariant_call_return.ll b/test/Transforms/PGOProfile/icp_covariant_call_return.ll index fc5054e3a574..aba075461deb 100644 --- a/test/Transforms/PGOProfile/icp_covariant_call_return.ll +++ b/test/Transforms/PGOProfile/icp_covariant_call_return.ll @@ -22,8 +22,7 @@ entry: %vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8 %vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0 %tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8 -; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8* -; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*) +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq %struct.Base* (%struct.B*)* %tmp3, bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to %struct.Base* (%struct.B*)*) ; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] ; ICALL-PROM:if.true.direct_targ: ; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D* diff --git a/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll b/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll index d2ff47dda0e6..0a4444783eb0 100644 --- a/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll +++ b/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll @@ -32,18 +32,19 @@ invoke.cont: %vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8 %vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0 %tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8 -; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8* -; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*) +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq %struct.Base* (%struct.B*)* %tmp3, bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to %struct.Base* (%struct.B*)*) ; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] ; ICALL-PROM:if.true.direct_targ: ; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D* ; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = invoke %struct.Derived* @_ZN1D4funcEv(%struct.D* [[ARG_BITCAST]]) -; ICALL-PROM: to label %if.end.icp unwind label %lpad +; ICALL-PROM: to label %if.true.direct_targ.if.end.icp_crit_edge unwind label %lpad +; ICALL-PROM:if.true.direct_targ.if.end.icp_crit_edge: +; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base* +; ICALL-PROM: br label %if.end.icp ; ICALL-PROM:if.false.orig_indirect: ; ICAll-PROM: %call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1) ; ICAll-PROM: to label %invoke.cont1 unwind label %lpad ; ICALL-PROM:if.end.icp: -; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base* ; ICALL-PROM: br label %invoke.cont1 %call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1) to label %invoke.cont1 unwind label %lpad, !prof !1 diff --git a/test/Transforms/PGOProfile/icp_invoke.ll b/test/Transforms/PGOProfile/icp_invoke.ll index 2ec564627aa1..1cacc1bc1aca 100644 --- a/test/Transforms/PGOProfile/icp_invoke.ll +++ b/test/Transforms/PGOProfile/icp_invoke.ll @@ -20,8 +20,7 @@ entry: define i32 @_Z3goov() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %tmp = load void ()*, void ()** @foo1, align 8 -; ICP: [[BITCAST_IC1:%[0-9]+]] = bitcast void ()* %tmp to i8* -; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i8* [[BITCAST_IC1]], bitcast (void ()* @_ZL4bar1v to i8*) +; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq void ()* %tmp, @_ZL4bar1v ; ICP: br i1 [[CMP_IC1]], label %[[TRUE_LABEL_IC1:.*]], label %[[FALSE_LABEL_IC1:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]] ; ICP:[[TRUE_LABEL_IC1]]: ; ICP: invoke void @_ZL4bar1v() @@ -49,17 +48,19 @@ catch: try.cont: %tmp6 = load i32 ()*, i32 ()** @foo2, align 8 -; ICP: [[BITCAST_IC2:%[0-9]+]] = bitcast i32 ()* %tmp6 to i8* -; ICP: [[CMP_IC2:%[0-9]+]] = icmp eq i8* [[BITCAST_IC2]], bitcast (i32 ()* @_ZL4bar2v to i8*) +; ICP: [[CMP_IC2:%[0-9]+]] = icmp eq i32 ()* %tmp6, @_ZL4bar2v ; ICP: br i1 [[CMP_IC2]], label %[[TRUE_LABEL_IC2:.*]], label %[[FALSE_LABEL_IC2:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]] ; ICP:[[TRUE_LABEL_IC2]]: -; ICP: [[RESULT_IC2:%[0-9]+]] = invoke i32 @_ZL4bar2v() -; ICP: to label %[[DCALL_NORMAL_DEST_IC2:.*]] unwind label %lpad1 +; ICP: [[RESULT_IC2_0:%[0-9]+]] = invoke i32 @_ZL4bar2v() +; ICP: to label %[[MERGE_BB:.*]] unwind label %lpad1 ; ICP:[[FALSE_LABEL_IC2]]: +; ICP: [[RESULT_IC2_1:%.+]] = invoke i32 %tmp6() +; ICP: to label %[[MERGE_BB]] unwind label %lpad1 %call = invoke i32 %tmp6() to label %try.cont8 unwind label %lpad1, !prof !3 -; ICP:[[DCALL_NORMAL_DEST_IC2]]: +; ICP:[[MERGE_BB]]: +; ICP: [[MERGE_PHI:%.+]] = phi i32 [ [[RESULT_IC2_1]], %[[FALSE_LABEL_IC2]] ], [ [[RESULT_IC2_0]], %[[TRUE_LABEL_IC2]] ] ; ICP: br label %try.cont8 lpad1: %tmp7 = landingpad { i8*, i32 } @@ -77,7 +78,7 @@ catch6: try.cont8: %i.0 = phi i32 [ undef, %catch6 ], [ %call, %try.cont ] -; ICP: %i.0 = phi i32 [ undef, %catch6 ], [ %call, %[[FALSE_LABEL_IC2]] ], [ [[RESULT_IC2]], %[[DCALL_NORMAL_DEST_IC2]] ] +; ICP: %i.0 = phi i32 [ undef, %catch6 ], [ [[MERGE_PHI]], %[[MERGE_BB]] ] ret i32 %i.0 eh.resume: diff --git a/test/Transforms/PGOProfile/icp_invoke_nouse.ll b/test/Transforms/PGOProfile/icp_invoke_nouse.ll index 5a1e6358cb61..096d2e0f222e 100644 --- a/test/Transforms/PGOProfile/icp_invoke_nouse.ll +++ b/test/Transforms/PGOProfile/icp_invoke_nouse.ll @@ -18,8 +18,7 @@ entry: if.end: ; preds = %entry %fptr = load i32 ()*, i32 ()** @pfptr, align 8 -; ICP: [[BITCAST_IC1:%[0-9]+]] = bitcast i32 ()* %fptr to i8* -; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i8* [[BITCAST_IC1]], bitcast (i32 ()* @_ZL4bar1v to i8*) +; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i32 ()* %fptr, @_ZL4bar1v ; ICP: br i1 [[CMP_IC1]], label %[[TRUE_LABEL_IC1:.*]], label %[[FALSE_LABEL_IC1:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]] ; ICP:[[TRUE_LABEL_IC1]]: ; ICP: invoke i32 @_ZL4bar1v() diff --git a/test/Transforms/PGOProfile/icp_vararg.ll b/test/Transforms/PGOProfile/icp_vararg.ll index 400aab3aead7..ec243470290a 100644 --- a/test/Transforms/PGOProfile/icp_vararg.ll +++ b/test/Transforms/PGOProfile/icp_vararg.ll @@ -13,8 +13,7 @@ entry: define i32 @bar() #1 { entry: %tmp = load i32 (i32, ...)*, i32 (i32, ...)** @foo, align 8 -; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 (i32, ...)* %tmp to i8* -; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 (i32, ...)* @va_func to i8*) +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i32 (i32, ...)* %tmp, @va_func ; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] ; ICALL-PROM:if.true.direct_targ: ; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 (i32, ...) @va_func(i32 3, i32 12, i32 22, i32 4) diff --git a/test/Transforms/PGOProfile/indirect_call_promotion.ll b/test/Transforms/PGOProfile/indirect_call_promotion.ll index 6832fecfaed3..85df5260f199 100644 --- a/test/Transforms/PGOProfile/indirect_call_promotion.ll +++ b/test/Transforms/PGOProfile/indirect_call_promotion.ll @@ -43,8 +43,7 @@ entry: define i32 @bar() { entry: %tmp = load i32 ()*, i32 ()** @foo, align 8 -; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 ()* %tmp to i8* -; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 ()* @func4 to i8*) +; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i32 ()* %tmp, @func4 ; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]] ; ICALL-PROM: if.true.direct_targ: ; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 @func4() diff --git a/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll index 4def8ce561c0..557a83a75626 100644 --- a/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -11,16 +11,20 @@ define i32 @fn1() { ; CHECK-LABEL: @fn1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 8, i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP7]], <4 x i32> <i32 6, i32 0, i32 0, i32 0> -; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 0), align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 2), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 3), align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 8, i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP11]], <4 x i32> <i32 6, i32 0, i32 0, i32 0> +; CHECK-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll b/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll deleted file mode 100644 index 5fc0298b6cef..000000000000 --- a/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll +++ /dev/null @@ -1,125 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -mtriple=x86_64-unknown -mattr=+avx -slp-vectorizer | FileCheck %s - - -;void jumble (int * restrict A, int * restrict B) { - ; int tmp0 = A[10]*A[0]; - ; int tmp1 = A[11]*A[1]; - ; int tmp2 = A[12]*A[3]; - ; int tmp3 = A[13]*A[2]; - ; B[0] = tmp0; - ; B[1] = tmp1; - ; B[2] = tmp2; - ; B[3] = tmp3; - ;} - - - ; Function Attrs: norecurse nounwind uwtable - define void @jumble1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) { -; CHECK-LABEL: @jumble1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 11 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>* -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2> -; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <4 x i32> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 -; CHECK-NEXT: ret void -; -entry: - %arrayidx = getelementptr inbounds i32, i32* %A, i64 10 - %0 = load i32, i32* %arrayidx, align 4 - %1 = load i32, i32* %A, align 4 - %mul = mul nsw i32 %0, %1 - %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 11 - %2 = load i32, i32* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1 - %3 = load i32, i32* %arrayidx3, align 4 - %mul4 = mul nsw i32 %2, %3 - %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 12 - %4 = load i32, i32* %arrayidx5, align 4 - %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 3 - %5 = load i32, i32* %arrayidx6, align 4 - %mul7 = mul nsw i32 %4, %5 - %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 13 - %6 = load i32, i32* %arrayidx8, align 4 - %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 2 - %7 = load i32, i32* %arrayidx9, align 4 - %mul10 = mul nsw i32 %6, %7 - store i32 %mul, i32* %B, align 4 - %arrayidx12 = getelementptr inbounds i32, i32* %B, i64 1 - store i32 %mul4, i32* %arrayidx12, align 4 - %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 2 - store i32 %mul7, i32* %arrayidx13, align 4 - %arrayidx14 = getelementptr inbounds i32, i32* %B, i64 3 - store i32 %mul10, i32* %arrayidx14, align 4 - ret void - } - -;Reversing the operand of MUL - ; Function Attrs: norecurse nounwind uwtable - define void @jumble2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) { -; CHECK-LABEL: @jumble2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 11 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>* -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2> -; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP1]] -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 -; CHECK-NEXT: ret void -; -entry: - %arrayidx = getelementptr inbounds i32, i32* %A, i64 10 - %0 = load i32, i32* %arrayidx, align 4 - %1 = load i32, i32* %A, align 4 - %mul = mul nsw i32 %1, %0 - %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 11 - %2 = load i32, i32* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1 - %3 = load i32, i32* %arrayidx3, align 4 - %mul4 = mul nsw i32 %3, %2 - %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 12 - %4 = load i32, i32* %arrayidx5, align 4 - %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 3 - %5 = load i32, i32* %arrayidx6, align 4 - %mul7 = mul nsw i32 %5, %4 - %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 13 - %6 = load i32, i32* %arrayidx8, align 4 - %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 2 - %7 = load i32, i32* %arrayidx9, align 4 - %mul10 = mul nsw i32 %7, %6 - store i32 %mul, i32* %B, align 4 - %arrayidx12 = getelementptr inbounds i32, i32* %B, i64 1 - store i32 %mul4, i32* %arrayidx12, align 4 - %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 2 - store i32 %mul7, i32* %arrayidx13, align 4 - %arrayidx14 = getelementptr inbounds i32, i32* %B, i64 3 - store i32 %mul10, i32* %arrayidx14, align 4 - ret void - } - diff --git a/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll deleted file mode 100644 index 568fd9f3ac79..000000000000 --- a/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll +++ /dev/null @@ -1,225 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -mtriple=x86_64-unknown -mattr=+avx -slp-vectorizer | FileCheck %s - -;void phiUsingLoads(int *restrict A, int *restrict B) { -; int tmp0, tmp1, tmp2, tmp3; -; for (int i = 0; i < 100; i++) { -; if (A[0] == 0) { -; tmp0 = A[i + 0]; -; tmp1 = A[i + 1]; -; tmp2 = A[i + 2]; -; tmp3 = A[i + 3]; -; } else if (A[25] == 0) { -; tmp0 = A[i + 0]; -; tmp1 = A[i + 1]; -; tmp2 = A[i + 2]; -; tmp3 = A[i + 3]; -; } else if (A[50] == 0) { -; tmp0 = A[i + 0]; -; tmp1 = A[i + 1]; -; tmp2 = A[i + 2]; -; tmp3 = A[i + 3]; -; } else if (A[75] == 0) { -; tmp0 = A[i + 0]; -; tmp1 = A[i + 1]; -; tmp2 = A[i + 3]; -; tmp3 = A[i + 2]; -; } -; } -; B[0] = tmp0; -; B[1] = tmp1; -; B[2] = tmp2; -; B[3] = tmp3; -;} - - -; Function Attrs: norecurse nounwind uwtable -define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) local_unnamed_addr #0 { -; CHECK-LABEL: @phiUsingLoads( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 25 -; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 50 -; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 75 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX66:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP27:%.*]], <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP27]], [[FOR_INC]] ] -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>* -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else: -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4 -; CHECK-NEXT: [[CMP13:%.*]] = icmp eq i32 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[CMP13]], label [[IF_THEN14:%.*]], label [[IF_ELSE27:%.*]] -; CHECK: if.then14: -; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX17]] to <4 x i32>* -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else27: -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX28]], align 4 -; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[TMP14]], 0 -; CHECK-NEXT: br i1 [[CMP29]], label [[IF_THEN30:%.*]], label [[IF_ELSE43:%.*]] -; CHECK: if.then30: -; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX33]] to <4 x i32>* -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else43: -; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX44]], align 4 -; CHECK-NEXT: [[CMP45:%.*]] = icmp eq i32 [[TMP20]], 0 -; CHECK-NEXT: br i1 [[CMP45]], label [[IF_THEN46:%.*]], label [[FOR_INC]] -; CHECK: if.then46: -; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[ARRAYIDX49]] to <4 x i32>* -; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2> -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[TMP27]] = phi <4 x i32> [ [[TMP7]], [[IF_THEN]] ], [ [[TMP13]], [[IF_THEN14]] ], [ [[TMP19]], [[IF_THEN30]] ], [ [[TMP26]], [[IF_THEN46]] ], [ [[TMP2]], [[IF_ELSE43]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; -entry: - %0 = load i32, i32* %A, align 4 - %cmp1 = icmp eq i32 %0, 0 - %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 25 - %arrayidx28 = getelementptr inbounds i32, i32* %A, i64 50 - %arrayidx44 = getelementptr inbounds i32, i32* %A, i64 75 - br label %for.body - -for.cond.cleanup: ; preds = %for.inc - store i32 %tmp0.1, i32* %B, align 4 - %arrayidx64 = getelementptr inbounds i32, i32* %B, i64 1 - store i32 %tmp1.1, i32* %arrayidx64, align 4 - %arrayidx65 = getelementptr inbounds i32, i32* %B, i64 2 - store i32 %tmp2.1, i32* %arrayidx65, align 4 - %arrayidx66 = getelementptr inbounds i32, i32* %B, i64 3 - store i32 %tmp3.1, i32* %arrayidx66, align 4 - ret void - -for.body: ; preds = %for.inc, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] - %tmp3.0111 = phi i32 [ undef, %entry ], [ %tmp3.1, %for.inc ] - %tmp2.0110 = phi i32 [ undef, %entry ], [ %tmp2.1, %for.inc ] - %tmp1.0109 = phi i32 [ undef, %entry ], [ %tmp1.1, %for.inc ] - %tmp0.0108 = phi i32 [ undef, %entry ], [ %tmp0.1, %for.inc ] - br i1 %cmp1, label %if.then, label %if.else - -if.then: ; preds = %for.body - %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv - %1 = load i32, i32* %arrayidx2, align 4 - %2 = add nuw nsw i64 %indvars.iv, 1 - %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %2 - %3 = load i32, i32* %arrayidx5, align 4 - %4 = add nuw nsw i64 %indvars.iv, 2 - %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %4 - %5 = load i32, i32* %arrayidx8, align 4 - %6 = add nuw nsw i64 %indvars.iv, 3 - %arrayidx11 = getelementptr inbounds i32, i32* %A, i64 %6 - %7 = load i32, i32* %arrayidx11, align 4 - br label %for.inc - -if.else: ; preds = %for.body - %8 = load i32, i32* %arrayidx12, align 4 - %cmp13 = icmp eq i32 %8, 0 - br i1 %cmp13, label %if.then14, label %if.else27 - -if.then14: ; preds = %if.else - %arrayidx17 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv - %9 = load i32, i32* %arrayidx17, align 4 - %10 = add nuw nsw i64 %indvars.iv, 1 - %arrayidx20 = getelementptr inbounds i32, i32* %A, i64 %10 - %11 = load i32, i32* %arrayidx20, align 4 - %12 = add nuw nsw i64 %indvars.iv, 2 - %arrayidx23 = getelementptr inbounds i32, i32* %A, i64 %12 - %13 = load i32, i32* %arrayidx23, align 4 - %14 = add nuw nsw i64 %indvars.iv, 3 - %arrayidx26 = getelementptr inbounds i32, i32* %A, i64 %14 - %15 = load i32, i32* %arrayidx26, align 4 - br label %for.inc - -if.else27: ; preds = %if.else - %16 = load i32, i32* %arrayidx28, align 4 - %cmp29 = icmp eq i32 %16, 0 - br i1 %cmp29, label %if.then30, label %if.else43 - -if.then30: ; preds = %if.else27 - %arrayidx33 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv - %17 = load i32, i32* %arrayidx33, align 4 - %18 = add nuw nsw i64 %indvars.iv, 1 - %arrayidx36 = getelementptr inbounds i32, i32* %A, i64 %18 - %19 = load i32, i32* %arrayidx36, align 4 - %20 = add nuw nsw i64 %indvars.iv, 2 - %arrayidx39 = getelementptr inbounds i32, i32* %A, i64 %20 - %21 = load i32, i32* %arrayidx39, align 4 - %22 = add nuw nsw i64 %indvars.iv, 3 - %arrayidx42 = getelementptr inbounds i32, i32* %A, i64 %22 - %23 = load i32, i32* %arrayidx42, align 4 - br label %for.inc - -if.else43: ; preds = %if.else27 - %24 = load i32, i32* %arrayidx44, align 4 - %cmp45 = icmp eq i32 %24, 0 - br i1 %cmp45, label %if.then46, label %for.inc - -if.then46: ; preds = %if.else43 - %arrayidx49 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv - %25 = load i32, i32* %arrayidx49, align 4 - %26 = add nuw nsw i64 %indvars.iv, 1 - %arrayidx52 = getelementptr inbounds i32, i32* %A, i64 %26 - %27 = load i32, i32* %arrayidx52, align 4 - %28 = add nuw nsw i64 %indvars.iv, 3 - %arrayidx55 = getelementptr inbounds i32, i32* %A, i64 %28 - %29 = load i32, i32* %arrayidx55, align 4 - %30 = add nuw nsw i64 %indvars.iv, 2 - %arrayidx58 = getelementptr inbounds i32, i32* %A, i64 %30 - %31 = load i32, i32* %arrayidx58, align 4 - br label %for.inc - -for.inc: ; preds = %if.then, %if.then30, %if.else43, %if.then46, %if.then14 - %tmp0.1 = phi i32 [ %1, %if.then ], [ %9, %if.then14 ], [ %17, %if.then30 ], [ %25, %if.then46 ], [ %tmp0.0108, %if.else43 ] - %tmp1.1 = phi i32 [ %3, %if.then ], [ %11, %if.then14 ], [ %19, %if.then30 ], [ %27, %if.then46 ], [ %tmp1.0109, %if.else43 ] - %tmp2.1 = phi i32 [ %5, %if.then ], [ %13, %if.then14 ], [ %21, %if.then30 ], [ %29, %if.then46 ], [ %tmp2.0110, %if.else43 ] - %tmp3.1 = phi i32 [ %7, %if.then ], [ %15, %if.then14 ], [ %23, %if.then30 ], [ %31, %if.then46 ], [ %tmp3.0111, %if.else43 ] - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 100 - br i1 %exitcond, label %for.cond.cleanup, label %for.body -} diff --git a/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/test/Transforms/SLPVectorizer/X86/jumbled-load.ll index be58521ed898..06e051a90b0d 100644 --- a/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -5,27 +5,34 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) { ; CHECK-LABEL: @jumbled-load( -; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0 +; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* %in, i64 0 +; CHECK-NEXT: [[LOAD_1:%.*]] = load i32, i32* [[IN_ADDR]], align 4 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3 +; CHECK-NEXT: [[LOAD_2:%.*]] = load i32, i32* [[GEP_1]], align 4 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1 +; CHECK-NEXT: [[LOAD_3:%.*]] = load i32, i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0 +; CHECK-NEXT: [[LOAD_4:%.*]] = load i32, i32* [[GEP_3]], align 4 +; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* %inn, i64 0 +; CHECK-NEXT: [[LOAD_5:%.*]] = load i32, i32* [[INN_ADDR]], align 4 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2 +; CHECK-NEXT: [[LOAD_6:%.*]] = load i32, i32* [[GEP_4]], align 4 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3 +; CHECK-NEXT: [[LOAD_7:%.*]] = load i32, i32* [[GEP_5]], align 4 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>* -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2> -; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 -; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 -; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 -; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[LOAD_8:%.*]] = load i32, i32* [[GEP_6]], align 4 +; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[LOAD_3]], [[LOAD_5]] +; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[LOAD_2]], [[LOAD_8]] +; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[LOAD_4]], [[LOAD_7]] +; CHECK-NEXT: [[MUL_4:%.*]] = mul i32 [[LOAD_1]], [[LOAD_6]] +; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* %out, i64 0 +; CHECK-NEXT: store i32 [[MUL_1]], i32* [[GEP_7]], align 4 +; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* %out, i64 1 +; CHECK-NEXT: store i32 [[MUL_2]], i32* [[GEP_8]], align 4 +; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* %out, i64 2 +; CHECK-NEXT: store i32 [[MUL_3]], i32* [[GEP_9]], align 4 +; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* %out, i64 3 +; CHECK-NEXT: store i32 [[MUL_4]], i32* [[GEP_10]], align 4 ; CHECK-NEXT: ret i32 undef ; %in.addr = getelementptr inbounds i32, i32* %in, i64 0 diff --git a/test/Transforms/SLPVectorizer/X86/store-jumbled.ll b/test/Transforms/SLPVectorizer/X86/store-jumbled.ll index 6ae763520013..1b2c76384e0b 100644 --- a/test/Transforms/SLPVectorizer/X86/store-jumbled.ll +++ b/test/Transforms/SLPVectorizer/X86/store-jumbled.ll @@ -6,26 +6,33 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) { ; CHECK-LABEL: @jumbled-load( ; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0 +; CHECK-NEXT: [[LOAD_1:%.*]] = load i32, i32* [[IN_ADDR]], align 4 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1 +; CHECK-NEXT: [[LOAD_2:%.*]] = load i32, i32* [[GEP_1]], align 4 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 +; CHECK-NEXT: [[LOAD_3:%.*]] = load i32, i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2> +; CHECK-NEXT: [[LOAD_4:%.*]] = load i32, i32* [[GEP_3]], align 4 ; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0 +; CHECK-NEXT: [[LOAD_5:%.*]] = load i32, i32* [[INN_ADDR]], align 4 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1 +; CHECK-NEXT: [[LOAD_6:%.*]] = load i32, i32* [[GEP_4]], align 4 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2 +; CHECK-NEXT: [[LOAD_7:%.*]] = load i32, i32* [[GEP_5]], align 4 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>* -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2> -; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[LOAD_8:%.*]] = load i32, i32* [[GEP_6]], align 4 +; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[LOAD_1]], [[LOAD_5]] +; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[LOAD_2]], [[LOAD_6]] +; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[LOAD_3]], [[LOAD_7]] +; CHECK-NEXT: [[MUL_4:%.*]] = mul i32 [[LOAD_4]], [[LOAD_8]] ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: store i32 [[MUL_1]], i32* [[GEP_9]], align 4 +; CHECK-NEXT: store i32 [[MUL_2]], i32* [[GEP_7]], align 4 +; CHECK-NEXT: store i32 [[MUL_3]], i32* [[GEP_10]], align 4 +; CHECK-NEXT: store i32 [[MUL_4]], i32* [[GEP_8]], align 4 ; CHECK-NEXT: ret i32 undef ; %in.addr = getelementptr inbounds i32, i32* %in, i64 0 diff --git a/test/Transforms/SampleProfile/entry_counts.ll b/test/Transforms/SampleProfile/entry_counts.ll index 6137a6908cf5..cab7c87e0493 100644 --- a/test/Transforms/SampleProfile/entry_counts.ll +++ b/test/Transforms/SampleProfile/entry_counts.ll @@ -9,8 +9,8 @@ entry: ret void, !dbg !9 } -; This function does not have profile, check if function_entry_count is 0 -; CHECK: {{.*}} = !{!"function_entry_count", i64 0} +; This function does not have profile, check if function_entry_count is -1 +; CHECK: {{.*}} = !{!"function_entry_count", i64 -1} define void @no_profile() { entry: ret void diff --git a/test/Transforms/SimplifyCFG/X86/if-conversion.ll b/test/Transforms/SimplifyCFG/X86/if-conversion.ll new file mode 100644 index 000000000000..28702572d480 --- /dev/null +++ b/test/Transforms/SimplifyCFG/X86/if-conversion.ll @@ -0,0 +1,231 @@ +; RUN: opt < %s -simplifycfg -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -S | FileCheck %s +; Avoid if-conversion if there is a long dependence chain. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; The first several cases test FindLongDependenceChain returns true, so +; if-conversion is blocked. + +define i64 @test1(i64** %pp, i64* %p) { +entry: + %0 = load i64*, i64** %pp, align 8 + %1 = load i64, i64* %0, align 8 + %cmp = icmp slt i64 %1, 0 + %pint = ptrtoint i64* %p to i64 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + %p1 = add i64 %pint, 8 + br label %cond.end + +cond.false: + %p2 = or i64 %pint, 16 + br label %cond.end + +cond.end: + %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] + %ptr = inttoptr i64 %p3 to i64* + %val = load i64, i64* %ptr, align 8 + ret i64 %val + +; CHECK-NOT: select +} + +define i64 @test2(i64** %pp, i64* %p) { +entry: + %0 = load i64*, i64** %pp, align 8 + %1 = load i64, i64* %0, align 8 + %cmp = icmp slt i64 %1, 0 + %pint = ptrtoint i64* %p to i64 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + %p1 = add i64 %pint, 8 + br label %cond.end + +cond.false: + %p2 = add i64 %pint, 16 + br label %cond.end + +cond.end: + %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] + %ptr = inttoptr i64 %p3 to i64* + %val = load i64, i64* %ptr, align 8 + ret i64 %val + +; CHECK-LABEL: @test2 +; CHECK-NOT: select +} + +; The following cases test FindLongDependenceChain returns false, so +; if-conversion will proceed. + +; Non trivial LatencyAdjustment. +define i64 @test3(i64** %pp, i64* %p) { +entry: + %0 = load i64*, i64** %pp, align 8 + %1 = load i64, i64* %0, align 8 + %cmp = icmp slt i64 %1, 0 + %pint = ptrtoint i64* %p to i64 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + %p1 = add i64 %pint, 8 + br label %cond.end + +cond.false: + %p2 = or i64 %pint, 16 + br label %cond.end + +cond.end: + %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] + %p4 = add i64 %p3, %1 + %ptr = inttoptr i64 %p4 to i64* + %val = load i64, i64* %ptr, align 8 + ret i64 %val + +; CHECK-LABEL: @test3 +; CHECK: select +} + +; Short dependence chain. +define i64 @test4(i64* %pp, i64* %p) { +entry: + %0 = load i64, i64* %pp, align 8 + %cmp = icmp slt i64 %0, 0 + %pint = ptrtoint i64* %p to i64 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + %p1 = add i64 %pint, 8 + br label %cond.end + +cond.false: + %p2 = or i64 %pint, 16 + br label %cond.end + +cond.end: + %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] + %ptr = inttoptr i64 %p3 to i64* + %val = load i64, i64* %ptr, align 8 + ret i64 %val + +; CHECK-LABEL: @test4 +; CHECK: select +} + +; High IPC. +define i64 @test5(i64** %pp, i64* %p) { +entry: + %0 = load i64*, i64** %pp, align 8 + %1 = load i64, i64* %0, align 8 + %cmp = icmp slt i64 %1, 0 + %pint = ptrtoint i64* %p to i64 + %2 = add i64 %pint, 2 + %3 = add i64 %pint, 3 + %4 = or i64 %pint, 16 + %5 = and i64 %pint, 255 + + %6 = or i64 %2, 9 + %7 = and i64 %3, 255 + %8 = add i64 %4, 4 + %9 = add i64 %5, 5 + + %10 = add i64 %6, 2 + %11 = add i64 %7, 3 + %12 = add i64 %8, 4 + %13 = add i64 %9, 5 + + %14 = add i64 %10, 6 + %15 = add i64 %11, 7 + %16 = add i64 %12, 8 + %17 = add i64 %13, 9 + + %18 = add i64 %14, 10 + %19 = add i64 %15, 11 + %20 = add i64 %16, 12 + %21 = add i64 %17, 13 + + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + %p1 = add i64 %pint, 8 + br label %cond.end + +cond.false: + %p2 = or i64 %pint, 16 + br label %cond.end + +cond.end: + %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] + %ptr = inttoptr i64 %p3 to i64* + %val = load i64, i64* %ptr, align 8 + + ret i64 %val + +; CHECK-LABEL: @test5 +; CHECK: select +} + +; Large BB size. +define i64 @test6(i64** %pp, i64* %p) { +entry: + %0 = load i64*, i64** %pp, align 8 + %1 = load i64, i64* %0, align 8 + %cmp = icmp slt i64 %1, 0 + %pint = ptrtoint i64* %p to i64 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + %p1 = add i64 %pint, 8 + br label %cond.end + +cond.false: + %p2 = or i64 %pint, 16 + br label %cond.end + +cond.end: + %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] + %ptr = inttoptr i64 %p3 to i64* + %val = load i64, i64* %ptr, align 8 + %2 = add i64 %pint, 2 + %3 = add i64 %pint, 3 + %4 = add i64 %2, 4 + %5 = add i64 %3, 5 + %6 = add i64 %4, 6 + %7 = add i64 %5, 7 + %8 = add i64 %6, 6 + %9 = add i64 %7, 7 + %10 = add i64 %8, 6 + %11 = add i64 %9, 7 + %12 = add i64 %10, 6 + %13 = add i64 %11, 7 + %14 = add i64 %12, 6 + %15 = add i64 %13, 7 + %16 = add i64 %14, 6 + %17 = add i64 %15, 7 + %18 = add i64 %16, 6 + %19 = add i64 %17, 7 + %20 = add i64 %18, 6 + %21 = add i64 %19, 7 + %22 = add i64 %20, 6 + %23 = add i64 %21, 7 + %24 = add i64 %22, 6 + %25 = add i64 %23, 7 + %26 = add i64 %24, 6 + %27 = add i64 %25, 7 + %28 = add i64 %26, 6 + %29 = add i64 %27, 7 + %30 = add i64 %28, 6 + %31 = add i64 %29, 7 + %32 = add i64 %30, 8 + %33 = add i64 %31, 9 + %34 = add i64 %32, %33 + %35 = and i64 %34, 255 + %res = add i64 %val, %35 + + ret i64 %res + +; CHECK-LABEL: @test6 +; CHECK: select +} |
