diff options
Diffstat (limited to 'test/Transforms')
34 files changed, 1794 insertions, 137 deletions
diff --git a/test/Transforms/CodeExtractor/live_shrink.ll b/test/Transforms/CodeExtractor/live_shrink.ll new file mode 100644 index 0000000000000..c25ed2b622cdc --- /dev/null +++ b/test/Transforms/CodeExtractor/live_shrink.ll @@ -0,0 +1,67 @@ +; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s +; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s + +%class.A = type { i32 } +@cond = local_unnamed_addr global i32 0, align 4 + +; Function Attrs: uwtable +define void @_Z3foov() local_unnamed_addr { +bb: + %tmp = alloca %class.A, align 4 + %tmp1 = bitcast %class.A* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) + %tmp2 = load i32, i32* @cond, align 4, !tbaa !2 + %tmp3 = icmp eq i32 %tmp2, 0 + br i1 %tmp3, label %bb4, label %bb5 + +bb4: ; preds = %bb + call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp) + br label %bb5 + +bb5: ; preds = %bb4, %bb + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +; Function Attrs: uwtable +define void @_Z3goov() local_unnamed_addr { +; CHECK-LABEL: @_Z3goov() +bb: +; CHECK: bb: +; CHECK-NOT: alloca +; CHECK-NOT: bitcast +; CHECK-NOT: llvm.lifetime +; CHECK: br i1 +; CHECK: codeRepl.i: +; CHECK: call void @_Z3foov.1_ + + tail call void @_Z3foov() + ret void +} + +; CHECK-LABEL: define internal void @_Z3foov.1_ +; CHECK: newFuncRoot: +; CHECK-NEXT: %tmp = alloca %class.A +; CHECK-NEXT: %tmp1 = bitcast %class.A* %tmp to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) +; CHECK: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) +; CHECK-NEXT: br label %bb5.exitStub + + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 304489)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} diff --git a/test/Transforms/CodeExtractor/live_shrink_gep.ll b/test/Transforms/CodeExtractor/live_shrink_gep.ll new file mode 100644 index 0000000000000..ac6aa4fbda43b --- /dev/null +++ b/test/Transforms/CodeExtractor/live_shrink_gep.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s +; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s + +%class.A = type { i8 } + +@cond = local_unnamed_addr global i32 0, align 4 + +; Function Attrs: uwtable +define void @_Z3foov() local_unnamed_addr { +bb: + %tmp = alloca %class.A, align 1 + %tmp1 = getelementptr inbounds %class.A, %class.A* %tmp, i64 0, i32 0 + call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %tmp1) + %tmp2 = load i32, i32* @cond, align 4, !tbaa !2 + %tmp3 = icmp eq i32 %tmp2, 0 + br i1 %tmp3, label %bb4, label %bb5 + +bb4: ; preds = %bb + call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp) + br label %bb5 + +bb5: ; preds = %bb4, %bb + call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %tmp1) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +; Function Attrs: uwtable +define void @_Z3goov() local_unnamed_addr { +; CHECK-LABEL: @_Z3goov() +bb: +; CHECK: bb: +; CHECK-NOT: alloca +; CHECK-NOT: getelementptr +; CHECK-NOT: llvm.lifetime +; CHECK: br i1 +; CHECK: codeRepl.i: +; CHECK: call void @_Z3foov.1_ + tail call void @_Z3foov() + ret void +} + +; CHECK-LABEL: define internal void @_Z3foov.1_ +; CHECK: newFuncRoot: +; CHECK-NEXT: %tmp = alloca %class.A +; CHECK-NEXT: %tmp1 = getelementptr +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8 +; CHECK: call void @llvm.lifetime.end.p0i8 +; CHECK-NEXT: br label %bb5.exitStub + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 304489)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} diff --git a/test/Transforms/CodeExtractor/live_shrink_hoist.ll b/test/Transforms/CodeExtractor/live_shrink_hoist.ll new file mode 100644 index 0000000000000..d1b310f017694 --- /dev/null +++ b/test/Transforms/CodeExtractor/live_shrink_hoist.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s +; RUN: opt -S -passes=partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s + +%class.A = type { i32 } + +@cond = local_unnamed_addr global i32 0, align 4 + +; Function Attrs: uwtable +define void @_Z3foov() local_unnamed_addr { +bb: + %tmp = alloca %class.A, align 4 + %tmp1 = bitcast %class.A* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) + %tmp2 = load i32, i32* @cond, align 4, !tbaa !2 + %tmp3 = icmp eq i32 %tmp2, 0 + br i1 %tmp3, label %bb4, label %bb9 + +bb4: ; preds = %bb + call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp) + %tmp5 = getelementptr inbounds %class.A, %class.A* %tmp, i64 0, i32 0 + %tmp6 = load i32, i32* %tmp5, align 4, !tbaa !6 + %tmp7 = icmp sgt i32 %tmp6, 0 + br i1 %tmp7, label %bb9, label %bb8 + +bb8: ; preds = %bb4 + call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp) + br label %bb9 + +bb9: ; preds = %bb8, %bb4, %bb + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +; Function Attrs: uwtable +define void @_Z3goov() local_unnamed_addr { +bb: + tail call void @_Z3foov() + ret void +} + +; CHECK-LABEL: define internal void @_Z3foov.1_ +; CHECK: bb9: +; CHECK: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) +; CHECK: br label %.exitStub + + + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 304489)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} +!6 = !{!7, !3, i64 0} +!7 = !{!"_ZTS1A", !3, i64 0} diff --git a/test/Transforms/CodeExtractor/live_shrink_multiple.ll b/test/Transforms/CodeExtractor/live_shrink_multiple.ll new file mode 100644 index 0000000000000..8d9045c7267b1 --- /dev/null +++ b/test/Transforms/CodeExtractor/live_shrink_multiple.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s +; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s + +%class.A = type { i32 } +@cond = local_unnamed_addr global i32 0, align 4 + +; Function Attrs: uwtable +define void @_Z3foov() local_unnamed_addr { +bb: + %tmp = alloca %class.A, align 4 + %tmp1 = alloca %class.A, align 4 + %tmp2 = bitcast %class.A* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp2) + %tmp3 = bitcast %class.A* %tmp1 to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp3) + %tmp4 = load i32, i32* @cond, align 4, !tbaa !2 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb6, label %bb7 + +bb6: ; preds = %bb + call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp) + br label %bb7 + +bb7: ; preds = %bb6, %bb + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp3) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp2) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +; Function Attrs: uwtable +define void @_Z3goov() local_unnamed_addr { +bb: + tail call void @_Z3foov() + ret void +} + +; CHECK-LABEL: define internal void @_Z3foov.1_ +; CHECK: newFuncRoot: +; CHECK-NEXT: alloca +; CHECK-NEXT: bitcast +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8 +; CHECK-NEXT: alloca +; CHECK-NEXT: bitcast +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8 +; CHECK: call void @llvm.lifetime.end.p0i8 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8 +; CHECK-NEXT: br label {{.*}}exitStub + + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 304489)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} diff --git a/test/Transforms/CodeExtractor/live_shrink_unsafe.ll b/test/Transforms/CodeExtractor/live_shrink_unsafe.ll new file mode 100644 index 0000000000000..ea6458cc46ec8 --- /dev/null +++ b/test/Transforms/CodeExtractor/live_shrink_unsafe.ll @@ -0,0 +1,94 @@ +; The expected behavior of this file is expected to change when partial +; inlining legality check is enhanced. + +; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s +; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s + +%class.A = type { i32 } + +@cond = local_unnamed_addr global i32 0, align 4 +@condptr = external local_unnamed_addr global i32*, align 8 + +; Function Attrs: uwtable +define void @_Z3foo_unknown_mem_accessv() local_unnamed_addr { +bb: + %tmp = alloca %class.A, align 4 + %tmp1 = alloca %class.A, align 4 + %tmp2 = bitcast %class.A* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp2) + %tmp3 = bitcast %class.A* %tmp1 to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp3) + %tmp4 = load i32*, i32** @condptr, align 8, !tbaa !2 + %tmp5 = load i32, i32* %tmp4, align 4, !tbaa !6 + %tmp6 = icmp eq i32 %tmp5, 0 + br i1 %tmp6, label %bb7, label %bb8 + +bb7: ; preds = %bb + call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp) + br label %bb8 + +bb8: ; preds = %bb7, %bb + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp3) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp2) + ret void +} + +declare void @_Z3barv() local_unnamed_addr +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +define void @_Z3foo_unknown_calli(i32 %arg) local_unnamed_addr { +bb: + %tmp = alloca %class.A, align 4 + %tmp1 = bitcast %class.A* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) + tail call void @_Z3barv() + %tmp2 = icmp eq i32 %arg, 0 + br i1 %tmp2, label %bb3, label %bb4 + +bb3: ; preds = %bb + call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp) + br label %bb4 + +bb4: ; preds = %bb3, %bb + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) + ret void +} + +define void @_Z3goov() local_unnamed_addr { +; CHECK-LABEL: @_Z3goov +; CHECK-NEXT: bb: +; CHECK: alloca +; CHECK: lifetime +bb: + call void @_Z3foo_unknown_mem_accessv() + %tmp = load i32, i32* @cond, align 4, !tbaa !2 + tail call void @_Z3foo_unknown_calli(i32 %tmp) + ret void +} + +; CHECK-LABEL define internal void @_Z3foo_unknown_calli.1_bb3 +; CHECK: newFuncRoot: +; CHECK-NEXT: br label %bb3 + +; CHECK: bb4.exitStub: +; CHECK-NEXT: ret void + +; CHECK: bb3: +; CHECK-NOT: lifetime.ed +; CHECK: br label %bb4.exitStub + + + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 304489)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"any pointer", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"int", !4, i64 0} diff --git a/test/Transforms/CrossDSOCFI/cfi_functions.ll b/test/Transforms/CrossDSOCFI/cfi_functions.ll new file mode 100644 index 0000000000000..ccbde51b2115c --- /dev/null +++ b/test/Transforms/CrossDSOCFI/cfi_functions.ll @@ -0,0 +1,23 @@ +; Test that types referenced in ThinLTO-style !cfi.functions are known to __cfi_check. +; RUN: opt -S -cross-dso-cfi < %s | FileCheck %s +; RUN: opt -S -passes=cross-dso-cfi < %s | FileCheck %s + +; CHECK: define void @__cfi_check( +; CHECK: switch i64 +; CHECK-NEXT: i64 1234, label +; CHECK-NEXT: i64 5678, label +; CHECK-NEXT: ] + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +!cfi.functions = !{!0, !1} +!llvm.module.flags = !{!6} + +!0 = !{!"f", i8 0, !2, !4} +!1 = !{!"g", i8 1, !3, !5} +!2 = !{i64 0, !"typeid1"} +!3 = !{i64 0, !"typeid2"} +!4 = !{i64 0, i64 1234} +!5 = !{i64 0, i64 5678} +!6 = !{i32 4, !"Cross-DSO CFI", i32 1} diff --git a/test/Transforms/EarlyCSE/pr33406.ll b/test/Transforms/EarlyCSE/pr33406.ll new file mode 100644 index 0000000000000..4d3312e1f0ac2 --- /dev/null +++ b/test/Transforms/EarlyCSE/pr33406.ll @@ -0,0 +1,26 @@ +; RUN: opt -early-cse-memssa -S %s | FileCheck %s + +; CHECK: define void @patatino() { +; CHECK: for.cond: +; CHECK-NEXT: br i1 true, label %if.end, label %for.inc +; CHECK: if.end: +; CHECK-NEXT: %tinkywinky = load i32, i32* @b +; CHECK-NEXT: br i1 true, label %for.inc, label %for.inc +; CHECK: for.inc: +; CHECK-NEXT: ret void + + +@b = external global i32 + +define void @patatino() { +for.cond: + br i1 true, label %if.end, label %for.inc + +if.end: + %tinkywinky = load i32, i32* @b + store i32 %tinkywinky, i32* @b + br i1 true, label %for.inc, label %for.inc + +for.inc: + ret void +} diff --git a/test/Transforms/GVN/pr32314.ll b/test/Transforms/GVN/pr32314.ll new file mode 100644 index 0000000000000..90d14f6fc49c0 --- /dev/null +++ b/test/Transforms/GVN/pr32314.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -gvn < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The load in the loop can not bypass the data from the previous loop. The store above it in the loop aliases. +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[P_017:%.*]] = phi i32* [ undef, [[ENTRY]] ], [ [[ARRAYIDX3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[A]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: store i32 50, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[P_017]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[ARRAYIDX3]] = getelementptr inbounds [3 x i32], [3 x i32]* [[A]], i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 60, i32* [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 3 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; +entry: + %a = alloca [3 x i32], align 4 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] + %p.017 = phi i32* [ undef, %entry ], [ %arrayidx3, %for.body ] + %0 = add nsw i64 %indvars.iv, -1 + %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* %a, i64 0, i64 %0 + store i32 50, i32* %arrayidx, align 4 + %1 = shl i64 %indvars.iv, 1 + %2 = load i32, i32* %p.017, align 4 + %3 = trunc i64 %1 to i32 + %add1 = add nsw i32 %2, %3 + %arrayidx3 = getelementptr inbounds [3 x i32], [3 x i32]* %a, i64 0, i64 %indvars.iv + store i32 60, i32* %arrayidx3, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 3 + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} diff --git a/test/Transforms/GlobalMerge/debug-info.ll b/test/Transforms/GlobalMerge/debug-info.ll index 97e0bb2148e93..8d60f3662431c 100644 --- a/test/Transforms/GlobalMerge/debug-info.ll +++ b/test/Transforms/GlobalMerge/debug-info.ll @@ -17,7 +17,7 @@ define void @use1() { ; CHECK: [[AVAR]] = !DIGlobalVariable(name: "a", scope: null, isLocal: false, isDefinition: true) ; CHECK: [[B]] = !DIGlobalVariableExpression(var: [[BVAR:![0-9]+]], expr: [[EXPR:![0-9]+]]) ; CHECK: [[BVAR]] = !DIGlobalVariable(name: "b", scope: null, isLocal: false, isDefinition: true) -; CHECK: [[EXPR]] = !DIExpression(DW_OP_plus, 4) +; CHECK: [[EXPR]] = !DIExpression(DW_OP_plus_uconst, 4) !llvm.module.flags = !{!4, !5} diff --git a/test/Transforms/Inline/always-inline.ll b/test/Transforms/Inline/always-inline.ll index 5366b5a16cc77..791eb94779b70 100644 --- a/test/Transforms/Inline/always-inline.ll +++ b/test/Transforms/Inline/always-inline.ll @@ -305,3 +305,14 @@ entry: ret void ; CHECK: ret void } + +define void @inner14() readnone nounwind { +; CHECK: define void @inner14 + ret void +} + +define void @outer14() { +; CHECK: call void @inner14 + call void @inner14() + ret void +} diff --git a/test/Transforms/InstCombine/debuginfo-dce.ll b/test/Transforms/InstCombine/debuginfo-dce.ll index 086743e80820b..50b8f1c6068e1 100644 --- a/test/Transforms/InstCombine/debuginfo-dce.ll +++ b/test/Transforms/InstCombine/debuginfo-dce.ll @@ -93,12 +93,12 @@ entry: ret void, !dbg !32 } -; CHECK: ![[LOAD_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_plus, 0) -; CHECK: ![[BITCAST_EXPR]] = !DIExpression(DW_OP_plus, 0) -; CHECK: ![[GEP0_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_plus, 0, DW_OP_stack_value) -; CHECK: ![[GEP1_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_stack_value, +; CHECK: ![[LOAD_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 0) +; CHECK: ![[BITCAST_EXPR]] = !DIExpression(DW_OP_plus_uconst, 0) +; CHECK: ![[GEP0_EXPR]] = !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_plus_uconst, 0, DW_OP_stack_value) +; CHECK: ![[GEP1_EXPR]] = !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value, ; CHECK-SAME: DW_OP_LLVM_fragment, 0, 32) -; CHECK: ![[GEP2_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_stack_value) +; CHECK: ![[GEP2_EXPR]] = !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value) ; Function Attrs: nounwind readnone declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 @@ -130,7 +130,7 @@ attributes #1 = { nounwind readnone } !17 = !{!18} !18 = !DILocalVariable(name: "entry", scope: !14, file: !1, line: 6, type: !4) !19 = !DILocation(line: 6, column: 17, scope: !14) -!20 = !DIExpression(DW_OP_plus, 0) +!20 = !DIExpression(DW_OP_plus_uconst, 0) !21 = !DILocation(line: 11, column: 1, scope: !14) !22 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !17) !23 = !DILocation(line: 6, column: 17, scope: !22) diff --git a/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll b/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll index 107440f10a5a2..230ac1796671f 100644 --- a/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll +++ b/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll @@ -1,10 +1,11 @@ ; RUN: opt -instcombine -unfold-element-atomic-memcpy-max-elements=8 -S < %s | FileCheck %s +; Temporarily an expected failure until inst combine is updated in the next patch target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -; Test basic unfolding -define void @test1(i8* %Src, i8* %Dst) { -; CHECK-LABEL: test1 -; CHECK-NOT: llvm.memcpy.element.atomic +; Test basic unfolding -- unordered load & store +define void @test1a(i8* %Src, i8* %Dst) { +; CHECK-LABEL: test1a +; CHECK-NOT: llvm.memcpy.element.unordered.atomic ; CHECK-DAG: %memcpy_unfold.src_casted = bitcast i8* %Src to i32* ; CHECK-DAG: %memcpy_unfold.dst_casted = bitcast i8* %Dst to i32* @@ -21,7 +22,7 @@ define void @test1(i8* %Src, i8* %Dst) { ; CHECK-DAG: [[VAL4:%[^\s]+]] = load atomic i32, i32* %{{[^\s]+}} unordered, align 4 ; CHECK-DAG: store atomic i32 [[VAL4]], i32* %{{[^\s]+}} unordered, align 4 entry: - call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dst, i8* align 8 %Src, i64 4, i32 4) + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %Dst, i8* align 4 %Src, i32 16, i32 4) ret void } @@ -31,9 +32,9 @@ define void @test2(i8* %Src, i8* %Dst) { ; CHECK-NOT: load ; CHECK-NOT: store -; CHECK: llvm.memcpy.element.atomic +; CHECK: llvm.memcpy.element.unordered.atomic entry: - call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dst, i8* align 4 %Src, i64 1000, i32 4) + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %Dst, i8* align 4 %Src, i32 256, i32 4) ret void } @@ -43,16 +44,16 @@ define void @test3(i8* %Src, i8* %Dst) { ; CHECK-NOT: load ; CHECK-NOT: store -; CHECK: llvm.memcpy.element.atomic +; CHECK: llvm.memcpy.element.unordered.atomic entry: - call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 64 %Dst, i8* align 64 %Src, i64 4, i32 64) + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 64 %Dst, i8* align 64 %Src, i32 64, i32 64) ret void } ; Test that we will eliminate redundant bitcasts define void @test4(i64* %Src, i64* %Dst) { ; CHECK-LABEL: test4 -; CHECK-NOT: llvm.memcpy.element.atomic +; CHECK-NOT: llvm.memcpy.element.unordered.atomic ; CHECK-NOT: bitcast @@ -76,17 +77,18 @@ define void @test4(i64* %Src, i64* %Dst) { entry: %Src.casted = bitcast i64* %Src to i8* %Dst.casted = bitcast i64* %Dst to i8* - call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 16 %Dst.casted, i8* align 16 %Src.casted, i64 4, i32 8) + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %Dst.casted, i8* align 16 %Src.casted, i32 32, i32 8) ret void } +; Test that 0-length unordered atomic memcpy gets removed. define void @test5(i8* %Src, i8* %Dst) { ; CHECK-LABEL: test5 -; CHECK-NOT: llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 64 %Dst, i8* align 64 %Src, i64 0, i32 64) +; CHECK-NOT: llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 64 %Dst, i8* align 64 %Src, i32 0, i32 8) entry: - call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 64 %Dst, i8* align 64 %Src, i64 0, i32 64) + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 64 %Dst, i8* align 64 %Src, i32 0, i32 8) ret void } -declare void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* nocapture, i8* nocapture, i64, i32) +declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll index d27fb5d89f092..af4ee85216ef2 100644 --- a/test/Transforms/InstCombine/ffs-1.ll +++ b/test/Transforms/InstCombine/ffs-1.ll @@ -1,12 +1,12 @@ ; Test that the ffs* library call simplifier works correctly. ; -; RUN: opt < %s -instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-FFS -; RUN: opt -instcombine -mtriple=arm64-apple-ios9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s -; RUN: opt -instcombine -mtriple=arm64-apple-tvos9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s -; RUN: opt -instcombine -mtriple=thumbv7k-apple-watchos2.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s -; RUN: opt -instcombine -mtriple=x86_64-apple-macosx10.11 -S %s | FileCheck --check-prefix=CHECK-FFS %s -; RUN: opt -instcombine -mtriple=x86_64-freebsd-gnu -S %s | FileCheck --check-prefix=CHECK-FFS %s +; RUN: opt < %s -instcombine -S | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC +; RUN: opt < %s -instcombine -mtriple i386-pc-linux -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET +; RUN: opt < %s -instcombine -mtriple=arm64-apple-ios9.0 -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET +; RUN: opt < %s -instcombine -mtriple=arm64-apple-tvos9.0 -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET +; RUN: opt < %s -instcombine -mtriple=thumbv7k-apple-watchos2.0 -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET +; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx10.11 -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET +; RUN: opt < %s -instcombine -mtriple=x86_64-freebsd-gnu -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET declare i32 @ffs(i32) declare i32 @ffsl(i32) @@ -15,123 +15,179 @@ declare i32 @ffsll(i64) ; Check ffs(0) -> 0. define i32 @test_simplify1() { -; CHECK-LABEL: @test_simplify1( +; ALL-LABEL: @test_simplify1( +; ALL-NEXT: ret i32 0 +; %ret = call i32 @ffs(i32 0) ret i32 %ret -; CHECK-NEXT: ret i32 0 } define i32 @test_simplify2() { -; CHECK-FFS-LABEL: @test_simplify2( +; GENERIC-LABEL: @test_simplify2( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsl(i32 0) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify2( +; TARGET-NEXT: ret i32 0 +; %ret = call i32 @ffsl(i32 0) ret i32 %ret -; CHECK-FFS-NEXT: ret i32 0 } define i32 @test_simplify3() { -; CHECK-FFS-LABEL: @test_simplify3( +; GENERIC-LABEL: @test_simplify3( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 0) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify3( +; TARGET-NEXT: ret i32 0 +; %ret = call i32 @ffsll(i64 0) ret i32 %ret -; CHECK-FFS-NEXT: ret i32 0 } ; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant. define i32 @test_simplify4() { -; CHECK-LABEL: @test_simplify4( +; ALL-LABEL: @test_simplify4( +; ALL-NEXT: ret i32 1 +; %ret = call i32 @ffs(i32 1) ret i32 %ret -; CHECK-NEXT: ret i32 1 } define i32 @test_simplify5() { -; CHECK-LABEL: @test_simplify5( +; ALL-LABEL: @test_simplify5( +; ALL-NEXT: ret i32 12 +; %ret = call i32 @ffs(i32 2048) ret i32 %ret -; CHECK-NEXT: ret i32 12 } define i32 @test_simplify6() { -; CHECK-LABEL: @test_simplify6( +; ALL-LABEL: @test_simplify6( +; ALL-NEXT: ret i32 17 +; %ret = call i32 @ffs(i32 65536) ret i32 %ret -; CHECK-NEXT: ret i32 17 } define i32 @test_simplify7() { -; CHECK-FFS-LABEL: @test_simplify7( +; GENERIC-LABEL: @test_simplify7( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsl(i32 65536) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify7( +; TARGET-NEXT: ret i32 17 +; %ret = call i32 @ffsl(i32 65536) ret i32 %ret -; CHECK-FFS-NEXT: ret i32 17 } define i32 @test_simplify8() { -; CHECK-FFS-LABEL: @test_simplify8( +; GENERIC-LABEL: @test_simplify8( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 1024) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify8( +; TARGET-NEXT: ret i32 11 +; %ret = call i32 @ffsll(i64 1024) ret i32 %ret -; CHECK-FFS-NEXT: ret i32 11 } define i32 @test_simplify9() { -; CHECK-FFS-LABEL: @test_simplify9( +; GENERIC-LABEL: @test_simplify9( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 65536) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify9( +; TARGET-NEXT: ret i32 17 +; %ret = call i32 @ffsll(i64 65536) ret i32 %ret -; CHECK-FFS-NEXT: ret i32 17 } define i32 @test_simplify10() { -; CHECK-FFS-LABEL: @test_simplify10( +; GENERIC-LABEL: @test_simplify10( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 17179869184) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify10( +; TARGET-NEXT: ret i32 35 +; %ret = call i32 @ffsll(i64 17179869184) ret i32 %ret -; CHECK-FFS-NEXT: ret i32 35 } define i32 @test_simplify11() { -; CHECK-FFS-LABEL: @test_simplify11( +; GENERIC-LABEL: @test_simplify11( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 281474976710656) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify11( +; TARGET-NEXT: ret i32 49 +; %ret = call i32 @ffsll(i64 281474976710656) ret i32 %ret -; CHECK-FFS-NEXT: ret i32 49 } define i32 @test_simplify12() { -; CHECK-FFS-LABEL: @test_simplify12( +; GENERIC-LABEL: @test_simplify12( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 1152921504606846976) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify12( +; TARGET-NEXT: ret i32 61 +; %ret = call i32 @ffsll(i64 1152921504606846976) ret i32 %ret -; CHECK-FFS-NEXT: ret i32 61 } ; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0. define i32 @test_simplify13(i32 %x) { -; CHECK-LABEL: @test_simplify13( +; ALL-LABEL: @test_simplify13( +; ALL-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true) +; ALL-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1 +; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0 +; ALL-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 +; ALL-NEXT: ret i32 [[TMP3]] +; %ret = call i32 @ffs(i32 %x) -; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true) -; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1 -; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0 -; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0 ret i32 %ret -; CHECK-NEXT: ret i32 [[RET]] } define i32 @test_simplify14(i32 %x) { -; CHECK-FFS-LABEL: @test_simplify14( +; GENERIC-LABEL: @test_simplify14( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsl(i32 %x) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify14( +; TARGET-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true) +; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1 +; TARGET-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0 +; TARGET-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 +; TARGET-NEXT: ret i32 [[TMP3]] +; %ret = call i32 @ffsl(i32 %x) -; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true) -; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1 -; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0 -; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0 ret i32 %ret -; CHECK-FFS-NEXT: ret i32 [[RET]] } define i32 @test_simplify15(i64 %x) { -; CHECK-FFS-LABEL: @test_simplify15( +; GENERIC-LABEL: @test_simplify15( +; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 %x) +; GENERIC-NEXT: ret i32 [[RET]] +; +; TARGET-LABEL: @test_simplify15( +; TARGET-NEXT: [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true) +; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[CTTZ]], 1 +; TARGET-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; TARGET-NEXT: [[TMP3:%.*]] = icmp ne i64 %x, 0 +; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 +; TARGET-NEXT: ret i32 [[TMP4]] +; %ret = call i32 @ffsll(i64 %x) -; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 true) -; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1 -; CHECK-FFS-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32 -; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0 -; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0 ret i32 %ret -; CHECK-FFS-NEXT: ret i32 [[RET]] } + diff --git a/test/Transforms/InstCombine/lshr.ll b/test/Transforms/InstCombine/lshr.ll index 71b25177162b9..4cdcb98f730c1 100644 --- a/test/Transforms/InstCombine/lshr.ll +++ b/test/Transforms/InstCombine/lshr.ll @@ -122,10 +122,19 @@ define <2 x i8> @bool_zext_splat(<2 x i1> %x) { ret <2 x i8> %hibit } -; FIXME: The replicated sign bits are all that's left. This could be ashr+zext. - -define i16 @smear_sign_and_widen(i4 %x) { +define i32 @smear_sign_and_widen(i8 %x) { ; CHECK-LABEL: @smear_sign_and_widen( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i8 %x, 7 +; CHECK-NEXT: [[HIBIT:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[HIBIT]] +; + %sext = sext i8 %x to i32 + %hibit = lshr i32 %sext, 24 + ret i32 %hibit +} + +define i16 @smear_sign_and_widen_should_not_change_type(i4 %x) { +; CHECK-LABEL: @smear_sign_and_widen_should_not_change_type( ; CHECK-NEXT: [[SEXT:%.*]] = sext i4 %x to i16 ; CHECK-NEXT: [[HIBIT:%.*]] = lshr i16 [[SEXT]], 12 ; CHECK-NEXT: ret i16 [[HIBIT]] @@ -137,8 +146,8 @@ define i16 @smear_sign_and_widen(i4 %x) { define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) { ; CHECK-LABEL: @smear_sign_and_widen_splat( -; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i6> %x to <2 x i8> -; CHECK-NEXT: [[HIBIT:%.*]] = lshr <2 x i8> [[SEXT]], <i8 2, i8 2> +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i6> %x, <i6 2, i6 2> +; CHECK-NEXT: [[HIBIT:%.*]] = zext <2 x i6> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[HIBIT]] ; %sext = sext <2 x i6> %x to <2 x i8> diff --git a/test/Transforms/InstCombine/onehot_merge.ll b/test/Transforms/InstCombine/onehot_merge.ll index 496d847b5321e..47a4ca4b628bf 100644 --- a/test/Transforms/InstCombine/onehot_merge.ll +++ b/test/Transforms/InstCombine/onehot_merge.ll @@ -33,3 +33,79 @@ bb: ret i1 %or } +; Same as above but with operands commuted one of the ands, but not the other. +define i1 @foo1_and_commuted(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_and_commuted( +; CHECK-NEXT: [[K2:%.*]] = mul i32 [[K:%.*]], [[K]] +; CHECK-NEXT: [[TMP:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 -2147483648, [[C2:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[TMP]], [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[K2]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: ret i1 [[TMP2]] +; + %k2 = mul i32 %k, %k ; to trick the complexity sorting + %tmp = shl i32 1, %c1 + %tmp4 = lshr i32 -2147483648, %c2 + %tmp1 = and i32 %k2, %tmp + %tmp2 = icmp eq i32 %tmp1, 0 + %tmp5 = and i32 %tmp4, %k2 + %tmp6 = icmp eq i32 %tmp5, 0 + %or = or i1 %tmp2, %tmp6 + ret i1 %or +} + +define i1 @or_consts(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @or_consts( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[K:%.*]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 12 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %tmp1 = and i32 4, %k + %tmp2 = icmp ne i32 %tmp1, 0 + %tmp5 = and i32 8, %k + %tmp6 = icmp ne i32 %tmp5, 0 + %or = and i1 %tmp2, %tmp6 + ret i1 %or +} + +define i1 @foo1_or(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_or( +; CHECK-NEXT: [[TMP:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 -2147483648, [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP]], [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %tmp = shl i32 1, %c1 + %tmp4 = lshr i32 -2147483648, %c2 + %tmp1 = and i32 %tmp, %k + %tmp2 = icmp ne i32 %tmp1, 0 + %tmp5 = and i32 %tmp4, %k + %tmp6 = icmp ne i32 %tmp5, 0 + %or = and i1 %tmp2, %tmp6 + ret i1 %or +} + +; Same as above but with operands commuted one of the ors, but not the other. +define i1 @foo1_or_commuted(i32 %k, i32 %c1, i32 %c2) { +; CHECK-LABEL: @foo1_or_commuted( +; CHECK-NEXT: [[K2:%.*]] = mul i32 [[K:%.*]], [[K]] +; CHECK-NEXT: [[TMP:%.*]] = shl i32 1, [[C1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 -2147483648, [[C2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP]], [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[K2]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %k2 = mul i32 %k, %k ; to trick the complexity sorting + %tmp = shl i32 1, %c1 + %tmp4 = lshr i32 -2147483648, %c2 + %tmp1 = and i32 %k2, %tmp + %tmp2 = icmp ne i32 %tmp1, 0 + %tmp5 = and i32 %tmp4, %k2 + %tmp6 = icmp ne i32 %tmp5, 0 + %or = and i1 %tmp2, %tmp6 + ret i1 %or +} diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll index f2bc290d79a45..485f9612376ad 100644 --- a/test/Transforms/InstCombine/or-xor.ll +++ b/test/Transforms/InstCombine/or-xor.ll @@ -114,6 +114,17 @@ define i32 @test10(i32 %A, i32 %B) { ret i32 %or } +define i32 @test10_commuted(i32 %A, i32 %B) { +; CHECK-LABEL: @test10_commuted( +; CHECK-NEXT: ret i32 -1 +; + %xor1 = xor i32 %B, %A + %not = xor i32 %A, -1 + %xor2 = xor i32 %not, %B + %or = or i32 %xor2, %xor1 + ret i32 %or +} + ; (x | y) & ((~x) ^ y) -> (x & y) define i32 @test11(i32 %x, i32 %y) { ; CHECK-LABEL: @test11( @@ -300,3 +311,36 @@ define i8 @or_xor_or(i8 %x) { ret i8 %or2 } +define i8 @test17(i8 %A, i8 %B) { +; CHECK-LABEL: @test17( +; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33 +; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = or i8 [[XOR1]], [[XOR2]] +; CHECK-NEXT: [[RES:%.*]] = mul i8 [[OR]], [[XOR2]] +; CHECK-NEXT: ret i8 [[RES]] +; + %xor1 = xor i8 %B, %A + %not = xor i8 %A, 33 + %xor2 = xor i8 %not, %B + %or = or i8 %xor1, %xor2 + %res = mul i8 %or, %xor2 ; to increase the use count for the xor + ret i8 %res +} + +define i8 @test18(i8 %A, i8 %B) { +; CHECK-LABEL: @test18( +; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33 +; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = or i8 [[XOR2]], [[XOR1]] +; CHECK-NEXT: [[RES:%.*]] = mul i8 [[OR]], [[XOR2]] +; CHECK-NEXT: ret i8 [[RES]] +; + %xor1 = xor i8 %B, %A + %not = xor i8 %A, 33 + %xor2 = xor i8 %not, %B + %or = or i8 %xor2, %xor1 + %res = mul i8 %or, %xor2 ; to increase the use count for the xor + ret i8 %res +} diff --git a/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/test/Transforms/InstCombine/select-with-bitwise-ops.ll index 68b73af21a8d6..faeb4e046aca8 100644 --- a/test/Transforms/InstCombine/select-with-bitwise-ops.ll +++ b/test/Transforms/InstCombine/select-with-bitwise-ops.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "n8:16:32:64" + define i32 @select_icmp_eq_and_1_0_or_2(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2( ; CHECK-NEXT: [[AND:%.*]] = shl i32 %x, 1 @@ -295,3 +297,269 @@ define i32 @test67(i16 %x) { ret i32 %3 } +define i32 @test68(i32 %x, i32 %y) { +; CHECK-LABEL: @test68( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[TMP1]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 128 + %cmp = icmp eq i32 %and, 0 + %or = or i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %or + ret i32 %select +} + +define i32 @test69(i32 %x, i32 %y) { +; CHECK-LABEL: @test69( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[SELECT]] +; + %and = and i32 %x, 128 + %cmp = icmp ne i32 %and, 0 + %or = or i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %or + ret i32 %select +} + +define i32 @shift_no_xor_multiuse_or(i32 %x, i32 %y) { +; CHECK-LABEL: @shift_no_xor_multiuse_or( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2 +; CHECK-NEXT: [[AND:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[OR]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 1 + %cmp = icmp eq i32 %and, 0 + %or = or i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %or + %res = mul i32 %select, %or ; to bump up use count of the Or + ret i32 %res +} + +define i32 @no_shift_no_xor_multiuse_or(i32 %x, i32 %y) { +; CHECK-LABEL: @no_shift_no_xor_multiuse_or( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], [[Y]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP1]], [[OR]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp eq i32 %and, 0 + %or = or i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %or + %res = mul i32 %select, %or ; to bump up use count of the Or + ret i32 %res +} + +define i32 @no_shift_xor_multiuse_or(i32 %x, i32 %y) { +; CHECK-LABEL: @no_shift_xor_multiuse_or( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[OR]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %or = or i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %or + %res = mul i32 %select, %or ; to bump up use count of the Or + ret i32 %res +} + +; TODO this increased the number of instructions +define i32 @shift_xor_multiuse_or(i32 %x, i32 %y) { +; CHECK-LABEL: @shift_xor_multiuse_or( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048 +; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 2048 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[OR]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %or = or i32 %y, 2048 + %select = select i1 %cmp, i32 %y, i32 %or + %res = mul i32 %select, %or ; to bump up use count of the Or + ret i32 %res +} + +define i32 @shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @shift_no_xor_multiuse_cmp( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 1 + %cmp = icmp eq i32 %and, 0 + %or = or i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %or + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + ret i32 %res +} + +define i32 @no_shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], [[Y:%.*]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP1]], [[SELECT2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp eq i32 %and, 0 + %or = or i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %or + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + ret i32 %res +} + +define i32 @no_shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @no_shift_xor_multiuse_cmp( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %or = or i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %or + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + ret i32 %res +} + +; TODO this increased the number of instructions +define i32 @shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @shift_xor_multiuse_cmp( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[SELECT2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %or = or i32 %y, 2048 + %select = select i1 %cmp, i32 %y, i32 %or + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + ret i32 %res +} + +; TODO this increased the number of instructions +define i32 @shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @shift_no_xor_multiuse_cmp_or( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]] +; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]] +; CHECK-NEXT: ret i32 [[RES2]] +; + %and = and i32 %x, 1 + %cmp = icmp eq i32 %and, 0 + %or = or i32 %y, 2 + %select = select i1 %cmp, i32 %y, i32 %or + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + %res2 = mul i32 %res, %or ; to bump up the use count of the or + ret i32 %res2 +} + +define i32 @no_shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_or( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], [[Y]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP1]], [[SELECT2]] +; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]] +; CHECK-NEXT: ret i32 [[RES2]] +; + %and = and i32 %x, 4096 + %cmp = icmp eq i32 %and, 0 + %or = or i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %or + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + %res2 = mul i32 %res, %or ; to bump up the use count of the or + ret i32 %res2 +} + +; TODO this increased the number of instructions +define i32 @no_shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @no_shift_xor_multiuse_cmp_or( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]] +; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]] +; CHECK-NEXT: ret i32 [[RES2]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %or = or i32 %y, 4096 + %select = select i1 %cmp, i32 %y, i32 %or + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + %res2 = mul i32 %res, %or ; to bump up the use count of the or + ret i32 %res2 +} + +; TODO this increased the number of instructions +define i32 @shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @shift_xor_multiuse_cmp_or( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y]] +; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[SELECT2]] +; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]] +; CHECK-NEXT: ret i32 [[RES2]] +; + %and = and i32 %x, 4096 + %cmp = icmp ne i32 0, %and + %or = or i32 %y, 2048 + %select = select i1 %cmp, i32 %y, i32 %or + %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp + %res = mul i32 %select, %select2 + %res2 = mul i32 %res, %or ; to bump up the use count of the or + ret i32 %res2 +} diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll index ce8e2fcd38b9b..68bbf35d1e65a 100644 --- a/test/Transforms/InstCombine/shift.ll +++ b/test/Transforms/InstCombine/shift.ll @@ -1306,3 +1306,13 @@ define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) { ret <2 x i8> %shr } +; Make sure known bits works correctly with non power of 2 bit widths. +define i7 @test65(i7 %a, i7 %b) { +; CHECK-LABEL: @test65( +; CHECK-NEXT: ret i7 0 +; + %shiftamt = and i7 %b, 6 ; this ensures the shift amount is even and less than the bit width. + %x = lshr i7 42, %shiftamt ; 42 has a zero in every even numbered bit and a one in every odd bit. + %y = and i7 %x, 1 ; this extracts the lsb which should be 0 because we shifted an even number of bits and all even bits of the shift input are 0. + ret i7 %y +} diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll index 3afbf632f6e19..49e6b999fbce2 100644 --- a/test/Transforms/InstCombine/xor2.ll +++ b/test/Transforms/InstCombine/xor2.ll @@ -325,3 +325,36 @@ define i32 @test14(i32 %a, i32 %b, i32 %c) { ret i32 %xor } +define i8 @test15(i8 %A, i8 %B) { +; CHECK-LABEL: @test15( +; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33 +; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]] +; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR1]], [[XOR2]] +; CHECK-NEXT: [[RES:%.*]] = mul i8 [[AND]], [[XOR2]] +; CHECK-NEXT: ret i8 [[RES]] +; + %xor1 = xor i8 %B, %A + %not = xor i8 %A, 33 + %xor2 = xor i8 %not, %B + %and = and i8 %xor1, %xor2 + %res = mul i8 %and, %xor2 ; to increase the use count for the xor + ret i8 %res +} + +define i8 @test16(i8 %A, i8 %B) { +; CHECK-LABEL: @test16( +; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33 +; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]] +; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR2]], [[XOR1]] +; CHECK-NEXT: [[RES:%.*]] = mul i8 [[AND]], [[XOR2]] +; CHECK-NEXT: ret i8 [[RES]] +; + %xor1 = xor i8 %B, %A + %not = xor i8 %A, 33 + %xor2 = xor i8 %not, %B + %and = and i8 %xor2, %xor1 + %res = mul i8 %and, %xor2 ; to increase the use count for the xor + ret i8 %res +} diff --git a/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll b/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll index ec93847178b58..d52378b864ff9 100644 --- a/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll +++ b/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll @@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu" ;; memcpy.atomic formation (atomic load & store) define void @test1(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test1( -; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) +; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) ; CHECK-NOT: store ; CHECK: ret void bb.nph: @@ -30,7 +30,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation (atomic store, normal load) define void @test2(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test2( -; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) +; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) ; CHECK-NOT: store ; CHECK: ret void bb.nph: @@ -55,7 +55,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (atomic store, normal load w/ no align) define void @test2b(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test2b( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: @@ -80,7 +80,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (atomic store, normal load w/ bad align) define void @test2c(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test2c( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: @@ -105,7 +105,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (atomic store w/ bad align, normal load) define void @test2d(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test2d( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: @@ -131,7 +131,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation (normal store, atomic load) define void @test3(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test3( -; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) +; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) ; CHECK-NOT: store ; CHECK: ret void bb.nph: @@ -156,7 +156,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (normal store w/ no align, atomic load) define void @test3b(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test3b( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: @@ -181,7 +181,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (normal store, atomic load w/ bad align) define void @test3c(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test3c( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: @@ -206,7 +206,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (normal store w/ bad align, atomic load) define void @test3d(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test3d( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: @@ -232,7 +232,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (atomic load, ordered-atomic store) define void @test4(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test4( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: @@ -257,7 +257,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (ordered-atomic load, unordered-atomic store) define void @test5(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test5( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: @@ -282,7 +282,8 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation (atomic load & store) -- element size 2 define void @test6(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test6( -; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 %Size, i32 2) +; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 1 +; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 [[Sz]], i32 2) ; CHECK-NOT: store ; CHECK: ret void bb.nph: @@ -307,7 +308,8 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation (atomic load & store) -- element size 4 define void @test7(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test7( -; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 %Size, i32 4) +; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 2 +; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 [[Sz]], i32 4) ; CHECK-NOT: store ; CHECK: ret void bb.nph: @@ -332,7 +334,8 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation (atomic load & store) -- element size 8 define void @test8(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test8( -; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 %Size, i32 8) +; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 3 +; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 [[Sz]], i32 8) ; CHECK-NOT: store ; CHECK: ret void bb.nph: @@ -357,7 +360,8 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (atomic load & store) -- element size 16 define void @test9(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test9( -; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 %Size, i32 16) +; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 4 +; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 [[Sz]], i32 16) ; CHECK-NOT: store ; CHECK: ret void bb.nph: @@ -382,7 +386,7 @@ for.end: ; preds = %for.body, %entry ;; memcpy.atomic formation rejection (atomic load & store) -- element size 32 define void @test10(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test10( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: diff --git a/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll b/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll index b2528f1c24577..341a7a0baebf0 100644 --- a/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll +++ b/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ;; Will not create call due to a max element size of 0 define void @test1(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test1( -; CHECK-NOT: call void @llvm.memcpy.element.atomic +; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic ; CHECK: store ; CHECK: ret void bb.nph: diff --git a/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml b/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml new file mode 100644 index 0000000000000..17b634acd0e1a --- /dev/null +++ b/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml @@ -0,0 +1,19 @@ +--- +TypeIdMap: + typeid1: + TTRes: + Kind: AllOnes + SizeM1BitWidth: 7 + typeid2: + TTRes: + Kind: Single + SizeM1BitWidth: 0 +WithGlobalValueDeadStripping: false +CfiFunctionDefs: + - local_a + - local_b + - does_not_exist +CfiFunctionDecls: + - external + - external_weak +... diff --git a/test/Transforms/LowerTypeTests/export-icall.ll b/test/Transforms/LowerTypeTests/export-icall.ll new file mode 100644 index 0000000000000..ad36048993067 --- /dev/null +++ b/test/Transforms/LowerTypeTests/export-icall.ll @@ -0,0 +1,70 @@ +; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s +; RUN: FileCheck --check-prefix=SUMMARY %s < %t + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @h(i8 %x) !type !2 { + ret void +} + +declare !type !8 void @f(i32 %x) + +!cfi.functions = !{!0, !1, !3, !4, !5, !6} + +; declaration of @h with a different type is ignored +!0 = !{!"h", i8 1, !7} + +; extern_weak declaration of @h with a different type is ignored as well +!1 = !{!"h", i8 2, !8} +!2 = !{i64 0, !"typeid1"} + +; definition of @f replaces types on the IR declaration above +!3 = !{!"f", i8 0, !2} +!4 = !{!"external", i8 1, !2} +!5 = !{!"external_weak", i8 2, !2} +!6 = !{!"g", i8 0, !7} +!7 = !{i64 0, !"typeid2"} +!8 = !{i64 0, !"typeid3"} + + +; CHECK-DAG: @__typeid_typeid1_global_addr = hidden alias i8, bitcast (void ()* [[JT1:.*]] to i8*) +; CHECK-DAG: @__typeid_typeid1_align = hidden alias i8, inttoptr (i8 3 to i8*) +; CHECK-DAG: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 3 to i8*) + +; CHECK-DAG: @h = alias void (i8), bitcast (void ()* [[JT1]] to void (i8)*) +; CHECK-DAG: @f = alias void (i32), {{.*}}getelementptr {{.*}}void ()* [[JT1]] +; CHECK-DAG: @external.cfi_jt = hidden alias void (), {{.*}}getelementptr {{.*}}void ()* [[JT1]] +; CHECK-DAG: @external_weak.cfi_jt = hidden alias void (), {{.*}}getelementptr {{.*}}void ()* [[JT1]] + +; CHECK-DAG: @__typeid_typeid2_global_addr = hidden alias i8, bitcast (void ()* [[JT2:.*]] to i8*) + +; CHECK-DAG: @g = alias void (), void ()* [[JT2]] + +; CHECK-DAG: define internal void @h.cfi(i8 {{.*}}) !type !{{.*}} +; CHECK-DAG: declare !type !{{.*}} void @external() +; CHECK-DAG: declare !type !{{.*}} void @external_weak() +; CHECK-DAG: declare !type !{{.*}} void @f.cfi(i32) +; CHECK-DAG: declare !type !{{.*}} void @g.cfi() + + +; SUMMARY: TypeIdMap: +; SUMMARY-NEXT: typeid1: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: AllOnes +; SUMMARY-NEXT: SizeM1BitWidth: 7 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: typeid2: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Single +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: WPDRes: + +; SUMMARY: CfiFunctionDefs: +; SUMMARY-NEXT: - f +; SUMMARY-NEXT: - g +; SUMMARY-NEXT: - h +; SUMMARY-NEXT: CfiFunctionDecls: +; SUMMARY-NEXT: - external +; SUMMARY-NEXT: - external_weak +; SUMMARY-NEXT: ... diff --git a/test/Transforms/LowerTypeTests/import-icall.ll b/test/Transforms/LowerTypeTests/import-icall.ll new file mode 100644 index 0000000000000..ddeb7fb5c9a2b --- /dev/null +++ b/test/Transforms/LowerTypeTests/import-icall.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import-icall.yaml < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i8 @local_a() { + call void @external() + call void @external_weak() + ret i8 1 +} + +define internal i8 @local_b() { + %x = call i8 @local_a() + ret i8 %x +} + +define i8 @use_b() { + %x = call i8 @local_b() + ret i8 %x +} + + +declare void @external() +declare extern_weak void @external_weak() + +; CHECK: define hidden i8 @local_a.cfi() { +; CHECK-NEXT: call void @external.cfi_jt() +; CHECK-NEXT: call void select (i1 icmp ne (void ()* @external_weak, void ()* null), void ()* @external_weak.cfi_jt, void ()* null)() +; CHECK-NEXT: ret i8 1 +; CHECK-NEXT: } + +; internal @local_b is not the same function as "local_b" in the summary. +; CHECK: define internal i8 @local_b() { +; CHECK-NEXT: call i8 @local_a() + +; CHECK: declare void @external() +; CHECK: declare extern_weak void @external_weak() +; CHECK: declare i8 @local_a() +; CHECK: declare hidden void @external.cfi_jt() +; CHECK: declare hidden void @external_weak.cfi_jt() diff --git a/test/Transforms/PGOProfile/memop_size_opt.ll b/test/Transforms/PGOProfile/memop_size_opt.ll index 19a2b7ed293b2..e11f235a48e76 100644 --- a/test/Transforms/PGOProfile/memop_size_opt.ll +++ b/test/Transforms/PGOProfile/memop_size_opt.ll @@ -38,7 +38,7 @@ for.body3: ; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i32 1, i1 false) ; MEMOP_OPT: br label %[[MERGE_LABEL:.*]] ; MEMOP_OPT: [[DEFAULT_LABEL]]: -; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i32 1, i1 false){{[[:space:]]}} +; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i32 1, i1 false), !prof [[NEWVP:![0-9]+]] ; MEMOP_OPT: br label %[[MERGE_LABEL]] ; MEMOP_OPT: [[MERGE_LABEL]]: ; MEMOP_OPT: switch i64 %conv, label %[[DEFAULT_LABEL2:.*]] [ @@ -48,11 +48,16 @@ for.body3: ; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src2, i64 1, i32 1, i1 false) ; MEMOP_OPT: br label %[[MERGE_LABEL2:.*]] ; MEMOP_OPT: [[DEFAULT_LABEL2]]: -; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src2, i64 %conv, i32 1, i1 false){{[[:space:]]}} +; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src2, i64 %conv, i32 1, i1 false), !prof [[NEWVP]] ; MEMOP_OPT: br label %[[MERGE_LABEL2]] ; MEMOP_OPT: [[MERGE_LABEL2]]: ; MEMOP_OPT: br label %for.inc ; MEMOP_OPT: [[SWITCH_BW]] = !{!"branch_weights", i32 457, i32 99} +; Should be 457 total left (original total count 556, minus 99 from specialized +; value 1, which is removed from VP array. Also, we only end up with 5 total +; values, since the default max number of promotions is 5 and therefore +; the rest of the values are ignored when extracting the VP metadata. +; MEMOP_OPT: [[NEWVP]] = !{!"VP", i32 1, i64 457, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66} for.inc: %inc = add nsw i32 %j.0, 1 diff --git a/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll b/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll new file mode 100644 index 0000000000000..105afa9def5c1 --- /dev/null +++ b/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll @@ -0,0 +1,92 @@ +; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s + +; This test checks that metadata that's invalid after RS4GC is dropped. +; We can miscompile if optimizations scheduled after RS4GC uses the +; metadata that's infact invalid. + +declare void @bar() + +declare void @baz(i32) +; Confirm that loadedval instruction does not contain invariant.load metadata. +; but contains the range metadata. +; Since loadedval is not marked invariant, it will prevent incorrectly sinking +; %loadedval in LICM and avoid creation of an unrelocated use of %baseaddr. +define void @test_invariant_load() gc "statepoint-example" { +; CHECK-LABEL: @test_invariant_load +; CHECK: %loadedval = load i32, i32 addrspace(1)* %baseaddr, align 8, !range !0 +bb: + br label %outerloopHdr + +outerloopHdr: ; preds = %bb6, %bb + %baseaddr = phi i32 addrspace(1)* [ undef, %bb ], [ %tmp4, %bb6 ] +; LICM may sink this load to exit block after RS4GC because it's tagged invariant. + %loadedval = load i32, i32 addrspace(1)* %baseaddr, align 8, !range !0, !invariant.load !1 + br label %innerloopHdr + +innerloopHdr: ; preds = %innerlooplatch, %outerloopHdr + %tmp4 = phi i32 addrspace(1)* [ %baseaddr, %outerloopHdr ], [ %gep, %innerlooplatch ] + br label %innermostloophdr + +innermostloophdr: ; preds = %bb6, %innerloopHdr + br i1 undef, label %exitblock, label %bb6 + +bb6: ; preds = %innermostloophdr + switch i32 undef, label %innermostloophdr [ + i32 0, label %outerloopHdr + i32 1, label %innerlooplatch + ] + +innerlooplatch: ; preds = %bb6 + call void @bar() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %tmp4, i64 8 + br label %innerloopHdr + +exitblock: ; preds = %innermostloophdr + %tmp13 = add i32 42, %loadedval + call void @baz(i32 %tmp13) + unreachable +} + +; drop the noalias metadata. +define void @test_noalias(i32 %x, i32 addrspace(1)* %p, i32 addrspace(1)* %q) gc "statepoint-example" { +; CHECK-LABEL: test_noalias +; CHECK: %y = load i32, i32 addrspace(1)* %q, align 16 +; CHECK: gc.statepoint +; CHECK: %p.relocated +; CHECK-NEXT: %p.relocated.casted = bitcast i8 addrspace(1)* %p.relocated to i32 addrspace(1)* +; CHECK-NEXT: store i32 %x, i32 addrspace(1)* %p.relocated.casted, align 16 +entry: + %y = load i32, i32 addrspace(1)* %q, align 16, !noalias !3 + call void @baz(i32 %x) + store i32 %x, i32 addrspace(1)* %p, align 16, !noalias !4 + ret void +} + +; drop the dereferenceable metadata +define void @test_dereferenceable(i32 addrspace(1)* addrspace(1)* %p, i32 %x, i32 addrspace(1)* %q) gc "statepoint-example" { +; CHECK-LABEL: test_dereferenceable +; CHECK: %v1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p +; CHECK-NEXT: %v2 = load i32, i32 addrspace(1)* %v1 +; CHECK: gc.statepoint + %v1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p, !dereferenceable !5 + %v2 = load i32, i32 addrspace(1)* %v1 + call void @baz(i32 %x) + store i32 %v2, i32 addrspace(1)* %q, align 16 + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64, i32, void (i32)*, i32, i32, ...) + +; Function Attrs: nounwind readonly +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) #0 + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) + +attributes #0 = { nounwind readonly } + +!0 = !{i32 0, i32 2147483647} +!1 = !{} +!2 = !{i32 10, i32 1} +!3 = !{!3} +!4 = !{!4} +!5 = !{i64 8} diff --git a/test/Transforms/SLPVectorizer/X86/arith-add.ll b/test/Transforms/SLPVectorizer/X86/arith-add.ll index 0266758b27d23..22b2c7422933b 100644 --- a/test/Transforms/SLPVectorizer/X86/arith-add.ll +++ b/test/Transforms/SLPVectorizer/X86/arith-add.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F @@ -38,6 +39,25 @@ define void @add_v8i64() { ; SSE-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8 ; SSE-NEXT: ret void ; +; SLM-LABEL: @add_v8i64( +; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = add <2 x i64> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = add <2 x i64> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8 +; SLM-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8 +; SLM-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8 +; SLM-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8 +; SLM-NEXT: ret void +; ; AVX-LABEL: @add_v8i64( ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8 @@ -111,6 +131,25 @@ define void @add_v16i32() { ; SSE-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SSE-NEXT: ret void ; +; SLM-LABEL: @add_v16i32( +; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = add <4 x i32> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: ret void +; ; AVX-LABEL: @add_v16i32( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 @@ -216,6 +255,25 @@ define void @add_v32i16() { ; SSE-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2 ; SSE-NEXT: ret void ; +; SLM-LABEL: @add_v32i16( +; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = add <8 x i16> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = add <8 x i16> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = add <8 x i16> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP10]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: ret void +; ; AVX-LABEL: @add_v32i16( ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2 diff --git a/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/test/Transforms/SLPVectorizer/X86/arith-fp.ll index e00ed849ee4b5..119cf594c905d 100644 --- a/test/Transforms/SLPVectorizer/X86/arith-fp.ll +++ b/test/Transforms/SLPVectorizer/X86/arith-fp.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX @@ -69,13 +70,32 @@ define <2 x double> @buildvector_mul_2f64(<2 x double> %a, <2 x double> %b) { } define <2 x double> @buildvector_div_2f64(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: @buildvector_div_2f64( -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x double> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: ret <2 x double> [[R1]] +; SSE-LABEL: @buildvector_div_2f64( +; SSE-NEXT: [[TMP1:%.*]] = fdiv <2 x double> [[A:%.*]], [[B:%.*]] +; SSE-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 +; SSE-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0 +; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; SSE-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1 +; SSE-NEXT: ret <2 x double> [[R1]] +; +; SLM-LABEL: @buildvector_div_2f64( +; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1 +; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1 +; SLM-NEXT: [[C0:%.*]] = fdiv double [[A0]], [[B0]] +; SLM-NEXT: [[C1:%.*]] = fdiv double [[A1]], [[B1]] +; SLM-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0 +; SLM-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[C1]], i32 1 +; SLM-NEXT: ret <2 x double> [[R1]] +; +; AVX-LABEL: @buildvector_div_2f64( +; AVX-NEXT: [[TMP1:%.*]] = fdiv <2 x double> [[A:%.*]], [[B:%.*]] +; AVX-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 +; AVX-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0 +; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; AVX-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1 +; AVX-NEXT: ret <2 x double> [[R1]] ; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 @@ -317,17 +337,48 @@ define <4 x double> @buildvector_mul_4f64(<4 x double> %a, <4 x double> %b) { } define <4 x double> @buildvector_div_4f64(<4 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @buildvector_div_4f64( -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x double> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 -; CHECK-NEXT: ret <4 x double> [[R3]] +; SSE-LABEL: @buildvector_div_4f64( +; SSE-NEXT: [[TMP1:%.*]] = fdiv <4 x double> [[A:%.*]], [[B:%.*]] +; SSE-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0 +; SSE-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0 +; SSE-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1 +; SSE-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1 +; SSE-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2 +; SSE-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2 +; SSE-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 +; SSE-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 +; SSE-NEXT: ret <4 x double> [[R3]] +; +; SLM-LABEL: @buildvector_div_4f64( +; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1 +; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2 +; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3 +; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1 +; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2 +; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3 +; SLM-NEXT: [[C0:%.*]] = fdiv double [[A0]], [[B0]] +; SLM-NEXT: [[C1:%.*]] = fdiv double [[A1]], [[B1]] +; SLM-NEXT: [[C2:%.*]] = fdiv double [[A2]], [[B2]] +; SLM-NEXT: [[C3:%.*]] = fdiv double [[A3]], [[B3]] +; SLM-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0 +; SLM-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1 +; SLM-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2 +; SLM-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3 +; SLM-NEXT: ret <4 x double> [[R3]] +; +; AVX-LABEL: @buildvector_div_4f64( +; AVX-NEXT: [[TMP1:%.*]] = fdiv <4 x double> [[A:%.*]], [[B:%.*]] +; AVX-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0 +; AVX-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0 +; AVX-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1 +; AVX-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2 +; AVX-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 +; AVX-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 +; AVX-NEXT: ret <4 x double> [[R3]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 @@ -745,25 +796,80 @@ define <8 x double> @buildvector_mul_8f64(<8 x double> %a, <8 x double> %b) { } define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) { -; CHECK-LABEL: @buildvector_div_8f64( -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4 -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 -; CHECK-NEXT: ret <8 x double> [[R7]] +; SSE-LABEL: @buildvector_div_8f64( +; SSE-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]] +; SSE-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0 +; SSE-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 +; SSE-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1 +; SSE-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1 +; SSE-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2 +; SSE-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2 +; SSE-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3 +; SSE-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3 +; SSE-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4 +; SSE-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4 +; SSE-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5 +; SSE-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5 +; SSE-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6 +; SSE-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6 +; SSE-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 +; SSE-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 +; SSE-NEXT: ret <8 x double> [[R7]] +; +; SLM-LABEL: @buildvector_div_8f64( +; SLM-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 +; SLM-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 +; SLM-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 +; SLM-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 +; SLM-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 +; SLM-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 +; SLM-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 +; SLM-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1 +; SLM-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2 +; SLM-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3 +; SLM-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4 +; SLM-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 +; SLM-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 +; SLM-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 +; SLM-NEXT: [[C0:%.*]] = fdiv double [[A0]], [[B0]] +; SLM-NEXT: [[C1:%.*]] = fdiv double [[A1]], [[B1]] +; SLM-NEXT: [[C2:%.*]] = fdiv double [[A2]], [[B2]] +; SLM-NEXT: [[C3:%.*]] = fdiv double [[A3]], [[B3]] +; SLM-NEXT: [[C4:%.*]] = fdiv double [[A4]], [[B4]] +; SLM-NEXT: [[C5:%.*]] = fdiv double [[A5]], [[B5]] +; SLM-NEXT: [[C6:%.*]] = fdiv double [[A6]], [[B6]] +; SLM-NEXT: [[C7:%.*]] = fdiv double [[A7]], [[B7]] +; SLM-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0 +; SLM-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1 +; SLM-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2 +; SLM-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3 +; SLM-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4 +; SLM-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5 +; SLM-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6 +; SLM-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7 +; SLM-NEXT: ret <8 x double> [[R7]] +; +; AVX-LABEL: @buildvector_div_8f64( +; AVX-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]] +; AVX-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0 +; AVX-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 +; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1 +; AVX-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2 +; AVX-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3 +; AVX-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4 +; AVX-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5 +; AVX-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6 +; AVX-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 +; AVX-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 +; AVX-NEXT: ret <8 x double> [[R7]] ; %a0 = extractelement <8 x double> %a, i32 0 %a1 = extractelement <8 x double> %a, i32 1 diff --git a/test/Transforms/SLPVectorizer/X86/arith-mul.ll b/test/Transforms/SLPVectorizer/X86/arith-mul.ll index 95875d7f01fd1..4763a9a2bf12b 100644 --- a/test/Transforms/SLPVectorizer/X86/arith-mul.ll +++ b/test/Transforms/SLPVectorizer/X86/arith-mul.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F @@ -54,6 +55,41 @@ define void @mul_v8i64() { ; SSE-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8 ; SSE-NEXT: ret void ; +; SLM-LABEL: @mul_v8i64( +; SLM-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8 +; SLM-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8 +; SLM-NEXT: [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8 +; SLM-NEXT: [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8 +; SLM-NEXT: [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8 +; SLM-NEXT: [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8 +; SLM-NEXT: [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8 +; SLM-NEXT: [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8 +; SLM-NEXT: [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8 +; SLM-NEXT: [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8 +; SLM-NEXT: [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8 +; SLM-NEXT: [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8 +; SLM-NEXT: [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8 +; SLM-NEXT: [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8 +; SLM-NEXT: [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8 +; SLM-NEXT: [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8 +; SLM-NEXT: [[R0:%.*]] = mul i64 [[A0]], [[B0]] +; SLM-NEXT: [[R1:%.*]] = mul i64 [[A1]], [[B1]] +; SLM-NEXT: [[R2:%.*]] = mul i64 [[A2]], [[B2]] +; SLM-NEXT: [[R3:%.*]] = mul i64 [[A3]], [[B3]] +; SLM-NEXT: [[R4:%.*]] = mul i64 [[A4]], [[B4]] +; SLM-NEXT: [[R5:%.*]] = mul i64 [[A5]], [[B5]] +; SLM-NEXT: [[R6:%.*]] = mul i64 [[A6]], [[B6]] +; SLM-NEXT: [[R7:%.*]] = mul i64 [[A7]], [[B7]] +; SLM-NEXT: store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8 +; SLM-NEXT: store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8 +; SLM-NEXT: store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8 +; SLM-NEXT: store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8 +; SLM-NEXT: store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8 +; SLM-NEXT: store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8 +; SLM-NEXT: store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8 +; SLM-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8 +; SLM-NEXT: ret void +; ; AVX1-LABEL: @mul_v8i64( ; AVX1-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8 ; AVX1-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8 @@ -162,6 +198,25 @@ define void @mul_v16i32() { ; SSE-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SSE-NEXT: ret void ; +; SLM-LABEL: @mul_v16i32( +; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = mul <4 x i32> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = mul <4 x i32> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: ret void +; ; AVX-LABEL: @mul_v16i32( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 @@ -267,6 +322,25 @@ define void @mul_v32i16() { ; SSE-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2 ; SSE-NEXT: ret void ; +; SLM-LABEL: @mul_v32i16( +; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP9:%.*]] = mul <8 x i16> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = mul <8 x i16> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = mul <8 x i16> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP10]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: ret void +; ; AVX-LABEL: @mul_v32i16( ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2 diff --git a/test/Transforms/SLPVectorizer/X86/arith-sub.ll b/test/Transforms/SLPVectorizer/X86/arith-sub.ll index 85838369e2266..2bbaaca02d88b 100644 --- a/test/Transforms/SLPVectorizer/X86/arith-sub.ll +++ b/test/Transforms/SLPVectorizer/X86/arith-sub.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F @@ -38,6 +39,25 @@ define void @sub_v8i64() { ; SSE-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8 ; SSE-NEXT: ret void ; +; SLM-LABEL: @sub_v8i64( +; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8 +; SLM-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = sub <2 x i64> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = sub <2 x i64> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8 +; SLM-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8 +; SLM-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8 +; SLM-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8 +; SLM-NEXT: ret void +; ; AVX-LABEL: @sub_v8i64( ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8 @@ -111,6 +131,25 @@ define void @sub_v16i32() { ; SSE-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SSE-NEXT: ret void ; +; SLM-LABEL: @sub_v16i32( +; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = sub <4 x i32> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = sub <4 x i32> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: ret void +; ; AVX-LABEL: @sub_v16i32( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 @@ -216,6 +255,25 @@ define void @sub_v32i16() { ; SSE-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2 ; SSE-NEXT: ret void ; +; SLM-LABEL: @sub_v32i16( +; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP9:%.*]] = sub <8 x i16> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = sub <8 x i16> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = sub <8 x i16> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = sub <8 x i16> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP10]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: ret void +; ; AVX-LABEL: @sub_v32i16( ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2 diff --git a/test/Transforms/SafeStack/X86/debug-loc.ll b/test/Transforms/SafeStack/X86/debug-loc.ll index 88cda693b2932..d6b217142bfef 100644 --- a/test/Transforms/SafeStack/X86/debug-loc.ll +++ b/test/Transforms/SafeStack/X86/debug-loc.ll @@ -37,10 +37,10 @@ entry: ; CHECK-DAG: ![[VAR_ARG]] = !DILocalVariable(name: "zzz" ; 100 aligned up to 8 -; CHECK-DAG: ![[EXPR_ARG]] = !DIExpression(DW_OP_minus, 104) +; CHECK-DAG: ![[EXPR_ARG]] = !DIExpression(DW_OP_constu, 104, DW_OP_minus ; CHECK-DAG: ![[VAR_LOCAL]] = !DILocalVariable(name: "xxx" -; CHECK-DAG: ![[EXPR_LOCAL]] = !DIExpression(DW_OP_minus, 208) +; CHECK-DAG: ![[EXPR_LOCAL]] = !DIExpression(DW_OP_constu, 208, DW_OP_minus ; Function Attrs: nounwind readnone declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 diff --git a/test/Transforms/SafeStack/X86/debug-loc2.ll b/test/Transforms/SafeStack/X86/debug-loc2.ll index 8059a722fd45c..731516c3c65ed 100644 --- a/test/Transforms/SafeStack/X86/debug-loc2.ll +++ b/test/Transforms/SafeStack/X86/debug-loc2.ll @@ -84,8 +84,8 @@ attributes #4 = { nounwind } !13 = !DILocation(line: 5, column: 3, scope: !6) !14 = !DILocation(line: 6, column: 3, scope: !6) -; CHECK-DAG: ![[X1_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_minus, 4) -; CHECK-DAG: ![[X2_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_minus, 8) +; CHECK-DAG: ![[X1_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_constu, 4, DW_OP_minus) +; CHECK-DAG: ![[X2_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_constu, 8, DW_OP_minus) !15 = !DIExpression(DW_OP_deref) !16 = !DILocation(line: 5, column: 7, scope: !6) !17 = !DILocation(line: 8, column: 3, scope: !6) @@ -95,4 +95,4 @@ attributes #4 = { nounwind } !21 = !DILocation(line: 10, column: 1, scope: !22) !22 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 1) !23 = !DIExpression() -!24 = !DIExpression(DW_OP_minus, 42) +!24 = !DIExpression(DW_OP_constu, 42, DW_OP_minus) diff --git a/test/Transforms/Util/PredicateInfo/pr33456.ll b/test/Transforms/Util/PredicateInfo/pr33456.ll new file mode 100644 index 0000000000000..f1cc83a071b96 --- /dev/null +++ b/test/Transforms/Util/PredicateInfo/pr33456.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s +; Don't insert predicate info for conditions with a single target. +@a = global i32 1, align 4 +@d = common global i32 0, align 4 +@c = common global i32 0, align 4 +@b = common global i32 0, align 4 +@e = common global i32 0, align 4 + +define i32 @main() { +; CHECK-LABEL: @main( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @d, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP13:%.*]] +; CHECK: [[TMP4:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* @c, align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +; CHECK-NEXT: br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP9:%.*]] +; CHECK: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9]], label [[TMP9]] +; CHECK: [[DOT0:%.*]] = phi i32 [ [[TMP4]], [[TMP7]] ], [ [[TMP4]], [[TMP7]] ], [ [[DOT1:%.*]], [[TMP13]] ], [ [[TMP4]], [[TMP3]] ] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[TMP11:%.*]] = sdiv i32 [[TMP10]], [[DOT0]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: br i1 [[TMP12]], label [[TMP13]], label [[TMP13]] +; CHECK: [[DOT1]] = phi i32 [ [[DOT0]], [[TMP9]] ], [ [[DOT0]], [[TMP9]] ], [ undef, [[TMP0:%.*]] ] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* @e, align 4 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 +; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP9]] +; CHECK: ret i32 0 +; + %1 = load i32, i32* @d, align 4 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %3, label %13 + +; <label>:3: ; preds = %0 + %4 = load i32, i32* @a, align 4 + %5 = load i32, i32* @c, align 4 + %6 = icmp slt i32 %5, 1 + br i1 %6, label %7, label %9 + +; <label>:7: ; preds = %3 + %8 = icmp eq i32 %4, 0 + br i1 %8, label %9, label %9 + +; <label>:9: ; preds = %13, %7, %7, %3 + %.0 = phi i32 [ %4, %7 ], [ %4, %7 ], [ %.1, %13 ], [ %4, %3 ] + %10 = load i32, i32* @b, align 4 + %11 = sdiv i32 %10, %.0 + %12 = icmp eq i32 %11, 0 + br i1 %12, label %13, label %13 + +; <label>:13: ; preds = %9, %9, %0 + %.1 = phi i32 [ %.0, %9 ], [ %.0, %9 ], [ undef, %0 ] + %14 = load i32, i32* @e, align 4 + %15 = icmp eq i32 %14, 0 + br i1 %15, label %16, label %9 + +; <label>:16: ; preds = %13 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + diff --git a/test/Transforms/Util/PredicateInfo/pr33457.ll b/test/Transforms/Util/PredicateInfo/pr33457.ll new file mode 100644 index 0000000000000..b975ade9321db --- /dev/null +++ b/test/Transforms/Util/PredicateInfo/pr33457.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s +; Don't insert predicate info for conditions with a single target. +@a = global i32 6, align 4 +@c = global i32 -1, align 4 +@e = common global i32 0, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 +@d = common global i32 0, align 4 +@b = common global [6 x i32] zeroinitializer, align 16 + +; Function Attrs: nounwind ssp uwtable +define i32 @main() { +; CHECK-LABEL: @main( +; CHECK-NEXT: store i32 6, i32* @e, align 4 +; CHECK-NEXT: br label [[TMP1:%.*]] +; CHECK: [[TMP2:%.*]] = load i32, i32* @d, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [6 x i32], [6 x i32]* @b, i64 0, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label %thread-pre-split, label [[TMP9:%.*]] +; CHECK: [[TMP10:%.*]] = load i32, i32* @e, align 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP12]] +; CHECK: thread-pre-split: +; CHECK-NEXT: [[DOTPR:%.*]] = load i32, i32* @e, align 4 +; CHECK-NEXT: br label [[TMP12]] +; CHECK: [[TMP13:%.*]] = phi i32 [ [[DOTPR]], %thread-pre-split ], [ [[TMP10]], [[TMP9]] ], [ [[TMP10]], [[TMP9]] ] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP15]] +; CHECK: br i1 [[TMP14]], label [[TMP16:%.*]], label [[TMP17:%.*]] +; CHECK: br label [[TMP17]] +; CHECK: [[DOT0:%.*]] = phi i32 [ 1, [[TMP16]] ], [ -1, [[TMP15]] ] +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[DOT0]], 8693 +; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* @c, align 4 +; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = xor i32 [[TMP20]], -1 +; CHECK-NEXT: store i32 [[TMP21]], i32* @d, align 4 +; CHECK-NEXT: [[TMP22:%.*]] = icmp slt i32 [[TMP20]], -2 +; CHECK-NEXT: br i1 [[TMP22]], label [[TMP1]], label [[TMP23:%.*]] +; CHECK: ret i32 0 +; + store i32 6, i32* @e, align 4 + br label %1 + +; <label>:1: ; preds = %17, %0 + %2 = load i32, i32* @d, align 4 + %3 = sext i32 %2 to i64 + %4 = getelementptr inbounds [6 x i32], [6 x i32]* @b, i64 0, i64 %3 + %5 = load i32, i32* %4, align 4 + %6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %5) #2 + %7 = load i32, i32* @a, align 4 + %8 = icmp eq i32 %7, 0 + br i1 %8, label %thread-pre-split, label %9 + +; <label>:9: ; preds = %1 + %10 = load i32, i32* @e, align 4 + %11 = icmp eq i32 %10, 0 + br i1 %11, label %12, label %12 + +thread-pre-split: ; preds = %1 + %.pr = load i32, i32* @e, align 4 + br label %12 + +; <label>:12: ; preds = %thread-pre-split, %9, %9 + %13 = phi i32 [ %.pr, %thread-pre-split ], [ %10, %9 ], [ %10, %9 ] + %14 = icmp ne i32 %13, 0 + br i1 %14, label %15, label %15 + +; <label>:15: ; preds = %12, %12 + br i1 %14, label %16, label %17 + +; <label>:16: ; preds = %15 + br label %17 + +; <label>:17: ; preds = %16, %15 + %.0 = phi i32 [ 1, %16 ], [ -1, %15 ] + %18 = and i32 %.0, 8693 + %19 = load i32, i32* @c, align 4 + %20 = xor i32 %18, %19 + %21 = xor i32 %20, -1 + store i32 %21, i32* @d, align 4 + %22 = icmp slt i32 %20, -2 + br i1 %22, label %1, label %23 + +; <label>:23: ; preds = %17 + ret i32 0 +} + +declare i32 @printf(i8*, ...) + |