summaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/CodeExtractor/live_shrink.ll67
-rw-r--r--test/Transforms/CodeExtractor/live_shrink_gep.ll66
-rw-r--r--test/Transforms/CodeExtractor/live_shrink_hoist.ll66
-rw-r--r--test/Transforms/CodeExtractor/live_shrink_multiple.ll66
-rw-r--r--test/Transforms/CodeExtractor/live_shrink_unsafe.ll94
-rw-r--r--test/Transforms/CrossDSOCFI/cfi_functions.ll23
-rw-r--r--test/Transforms/EarlyCSE/pr33406.ll26
-rw-r--r--test/Transforms/GVN/pr32314.ll53
-rw-r--r--test/Transforms/GlobalMerge/debug-info.ll2
-rw-r--r--test/Transforms/Inline/always-inline.ll11
-rw-r--r--test/Transforms/InstCombine/debuginfo-dce.ll12
-rw-r--r--test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll30
-rw-r--r--test/Transforms/InstCombine/ffs-1.ll156
-rw-r--r--test/Transforms/InstCombine/lshr.ll19
-rw-r--r--test/Transforms/InstCombine/onehot_merge.ll76
-rw-r--r--test/Transforms/InstCombine/or-xor.ll44
-rw-r--r--test/Transforms/InstCombine/select-with-bitwise-ops.ll268
-rw-r--r--test/Transforms/InstCombine/shift.ll10
-rw-r--r--test/Transforms/InstCombine/xor2.ll33
-rw-r--r--test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll36
-rw-r--r--test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll2
-rw-r--r--test/Transforms/LowerTypeTests/Inputs/import-icall.yaml19
-rw-r--r--test/Transforms/LowerTypeTests/export-icall.ll70
-rw-r--r--test/Transforms/LowerTypeTests/import-icall.ll40
-rw-r--r--test/Transforms/PGOProfile/memop_size_opt.ll9
-rw-r--r--test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll92
-rw-r--r--test/Transforms/SLPVectorizer/X86/arith-add.ll58
-rw-r--r--test/Transforms/SLPVectorizer/X86/arith-fp.ll180
-rw-r--r--test/Transforms/SLPVectorizer/X86/arith-mul.ll74
-rw-r--r--test/Transforms/SLPVectorizer/X86/arith-sub.ll58
-rw-r--r--test/Transforms/SafeStack/X86/debug-loc.ll4
-rw-r--r--test/Transforms/SafeStack/X86/debug-loc2.ll6
-rw-r--r--test/Transforms/Util/PredicateInfo/pr33456.ll68
-rw-r--r--test/Transforms/Util/PredicateInfo/pr33457.ll93
34 files changed, 1794 insertions, 137 deletions
diff --git a/test/Transforms/CodeExtractor/live_shrink.ll b/test/Transforms/CodeExtractor/live_shrink.ll
new file mode 100644
index 0000000000000..c25ed2b622cdc
--- /dev/null
+++ b/test/Transforms/CodeExtractor/live_shrink.ll
@@ -0,0 +1,67 @@
+; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+
+%class.A = type { i32 }
+@cond = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: uwtable
+define void @_Z3foov() local_unnamed_addr {
+bb:
+ %tmp = alloca %class.A, align 4
+ %tmp1 = bitcast %class.A* %tmp to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1)
+ %tmp2 = load i32, i32* @cond, align 4, !tbaa !2
+ %tmp3 = icmp eq i32 %tmp2, 0
+ br i1 %tmp3, label %bb4, label %bb5
+
+bb4: ; preds = %bb
+ call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+ br label %bb5
+
+bb5: ; preds = %bb4, %bb
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1)
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+; Function Attrs: uwtable
+define void @_Z3goov() local_unnamed_addr {
+; CHECK-LABEL: @_Z3goov()
+bb:
+; CHECK: bb:
+; CHECK-NOT: alloca
+; CHECK-NOT: bitcast
+; CHECK-NOT: llvm.lifetime
+; CHECK: br i1
+; CHECK: codeRepl.i:
+; CHECK: call void @_Z3foov.1_
+
+ tail call void @_Z3foov()
+ ret void
+}
+
+; CHECK-LABEL: define internal void @_Z3foov.1_
+; CHECK: newFuncRoot:
+; CHECK-NEXT: %tmp = alloca %class.A
+; CHECK-NEXT: %tmp1 = bitcast %class.A* %tmp to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1)
+; CHECK: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1)
+; CHECK-NEXT: br label %bb5.exitStub
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}
diff --git a/test/Transforms/CodeExtractor/live_shrink_gep.ll b/test/Transforms/CodeExtractor/live_shrink_gep.ll
new file mode 100644
index 0000000000000..ac6aa4fbda43b
--- /dev/null
+++ b/test/Transforms/CodeExtractor/live_shrink_gep.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+
+%class.A = type { i8 }
+
+@cond = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: uwtable
+define void @_Z3foov() local_unnamed_addr {
+bb:
+ %tmp = alloca %class.A, align 1
+ %tmp1 = getelementptr inbounds %class.A, %class.A* %tmp, i64 0, i32 0
+ call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %tmp1)
+ %tmp2 = load i32, i32* @cond, align 4, !tbaa !2
+ %tmp3 = icmp eq i32 %tmp2, 0
+ br i1 %tmp3, label %bb4, label %bb5
+
+bb4: ; preds = %bb
+ call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+ br label %bb5
+
+bb5: ; preds = %bb4, %bb
+ call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %tmp1)
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+; Function Attrs: uwtable
+define void @_Z3goov() local_unnamed_addr {
+; CHECK-LABEL: @_Z3goov()
+bb:
+; CHECK: bb:
+; CHECK-NOT: alloca
+; CHECK-NOT: getelementptr
+; CHECK-NOT: llvm.lifetime
+; CHECK: br i1
+; CHECK: codeRepl.i:
+; CHECK: call void @_Z3foov.1_
+ tail call void @_Z3foov()
+ ret void
+}
+
+; CHECK-LABEL: define internal void @_Z3foov.1_
+; CHECK: newFuncRoot:
+; CHECK-NEXT: %tmp = alloca %class.A
+; CHECK-NEXT: %tmp1 = getelementptr
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8
+; CHECK: call void @llvm.lifetime.end.p0i8
+; CHECK-NEXT: br label %bb5.exitStub
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}
diff --git a/test/Transforms/CodeExtractor/live_shrink_hoist.ll b/test/Transforms/CodeExtractor/live_shrink_hoist.ll
new file mode 100644
index 0000000000000..d1b310f017694
--- /dev/null
+++ b/test/Transforms/CodeExtractor/live_shrink_hoist.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+; RUN: opt -S -passes=partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+
+%class.A = type { i32 }
+
+@cond = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: uwtable
+define void @_Z3foov() local_unnamed_addr {
+bb:
+ %tmp = alloca %class.A, align 4
+ %tmp1 = bitcast %class.A* %tmp to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1)
+ %tmp2 = load i32, i32* @cond, align 4, !tbaa !2
+ %tmp3 = icmp eq i32 %tmp2, 0
+ br i1 %tmp3, label %bb4, label %bb9
+
+bb4: ; preds = %bb
+ call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+ %tmp5 = getelementptr inbounds %class.A, %class.A* %tmp, i64 0, i32 0
+ %tmp6 = load i32, i32* %tmp5, align 4, !tbaa !6
+ %tmp7 = icmp sgt i32 %tmp6, 0
+ br i1 %tmp7, label %bb9, label %bb8
+
+bb8: ; preds = %bb4
+ call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+ br label %bb9
+
+bb9: ; preds = %bb8, %bb4, %bb
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1)
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+; Function Attrs: uwtable
+define void @_Z3goov() local_unnamed_addr {
+bb:
+ tail call void @_Z3foov()
+ ret void
+}
+
+; CHECK-LABEL: define internal void @_Z3foov.1_
+; CHECK: bb9:
+; CHECK: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1)
+; CHECK: br label %.exitStub
+
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}
+!6 = !{!7, !3, i64 0}
+!7 = !{!"_ZTS1A", !3, i64 0}
diff --git a/test/Transforms/CodeExtractor/live_shrink_multiple.ll b/test/Transforms/CodeExtractor/live_shrink_multiple.ll
new file mode 100644
index 0000000000000..8d9045c7267b1
--- /dev/null
+++ b/test/Transforms/CodeExtractor/live_shrink_multiple.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+
+%class.A = type { i32 }
+@cond = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: uwtable
+define void @_Z3foov() local_unnamed_addr {
+bb:
+ %tmp = alloca %class.A, align 4
+ %tmp1 = alloca %class.A, align 4
+ %tmp2 = bitcast %class.A* %tmp to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp2)
+ %tmp3 = bitcast %class.A* %tmp1 to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp3)
+ %tmp4 = load i32, i32* @cond, align 4, !tbaa !2
+ %tmp5 = icmp eq i32 %tmp4, 0
+ br i1 %tmp5, label %bb6, label %bb7
+
+bb6: ; preds = %bb
+ call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+ br label %bb7
+
+bb7: ; preds = %bb6, %bb
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp3)
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp2)
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+; Function Attrs: uwtable
+define void @_Z3goov() local_unnamed_addr {
+bb:
+ tail call void @_Z3foov()
+ ret void
+}
+
+; CHECK-LABEL: define internal void @_Z3foov.1_
+; CHECK: newFuncRoot:
+; CHECK-NEXT: alloca
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8
+; CHECK-NEXT: alloca
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8
+; CHECK: call void @llvm.lifetime.end.p0i8
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8
+; CHECK-NEXT: br label {{.*}}exitStub
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}
diff --git a/test/Transforms/CodeExtractor/live_shrink_unsafe.ll b/test/Transforms/CodeExtractor/live_shrink_unsafe.ll
new file mode 100644
index 0000000000000..ea6458cc46ec8
--- /dev/null
+++ b/test/Transforms/CodeExtractor/live_shrink_unsafe.ll
@@ -0,0 +1,94 @@
+; The expected behavior of this file is expected to change when partial
+; inlining legality check is enhanced.
+
+; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s | FileCheck %s
+
+%class.A = type { i32 }
+
+@cond = local_unnamed_addr global i32 0, align 4
+@condptr = external local_unnamed_addr global i32*, align 8
+
+; Function Attrs: uwtable
+define void @_Z3foo_unknown_mem_accessv() local_unnamed_addr {
+bb:
+ %tmp = alloca %class.A, align 4
+ %tmp1 = alloca %class.A, align 4
+ %tmp2 = bitcast %class.A* %tmp to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp2)
+ %tmp3 = bitcast %class.A* %tmp1 to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp3)
+ %tmp4 = load i32*, i32** @condptr, align 8, !tbaa !2
+ %tmp5 = load i32, i32* %tmp4, align 4, !tbaa !6
+ %tmp6 = icmp eq i32 %tmp5, 0
+ br i1 %tmp6, label %bb7, label %bb8
+
+bb7: ; preds = %bb
+ call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+ br label %bb8
+
+bb8: ; preds = %bb7, %bb
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp3)
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp2)
+ ret void
+}
+
+declare void @_Z3barv() local_unnamed_addr
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define void @_Z3foo_unknown_calli(i32 %arg) local_unnamed_addr {
+bb:
+ %tmp = alloca %class.A, align 4
+ %tmp1 = bitcast %class.A* %tmp to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1)
+ tail call void @_Z3barv()
+ %tmp2 = icmp eq i32 %arg, 0
+ br i1 %tmp2, label %bb3, label %bb4
+
+bb3: ; preds = %bb
+ call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+ br label %bb4
+
+bb4: ; preds = %bb3, %bb
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1)
+ ret void
+}
+
+define void @_Z3goov() local_unnamed_addr {
+; CHECK-LABEL: @_Z3goov
+; CHECK-NEXT: bb:
+; CHECK: alloca
+; CHECK: lifetime
+bb:
+ call void @_Z3foo_unknown_mem_accessv()
+ %tmp = load i32, i32* @cond, align 4, !tbaa !2
+ tail call void @_Z3foo_unknown_calli(i32 %tmp)
+ ret void
+}
+
+; CHECK-LABEL define internal void @_Z3foo_unknown_calli.1_bb3
+; CHECK: newFuncRoot:
+; CHECK-NEXT: br label %bb3
+
+; CHECK: bb4.exitStub:
+; CHECK-NEXT: ret void
+
+; CHECK: bb3:
+; CHECK-NOT: lifetime.ed
+; CHECK: br label %bb4.exitStub
+
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"any pointer", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !4, i64 0}
diff --git a/test/Transforms/CrossDSOCFI/cfi_functions.ll b/test/Transforms/CrossDSOCFI/cfi_functions.ll
new file mode 100644
index 0000000000000..ccbde51b2115c
--- /dev/null
+++ b/test/Transforms/CrossDSOCFI/cfi_functions.ll
@@ -0,0 +1,23 @@
+; Test that types referenced in ThinLTO-style !cfi.functions are known to __cfi_check.
+; RUN: opt -S -cross-dso-cfi < %s | FileCheck %s
+; RUN: opt -S -passes=cross-dso-cfi < %s | FileCheck %s
+
+; CHECK: define void @__cfi_check(
+; CHECK: switch i64
+; CHECK-NEXT: i64 1234, label
+; CHECK-NEXT: i64 5678, label
+; CHECK-NEXT: ]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+!cfi.functions = !{!0, !1}
+!llvm.module.flags = !{!6}
+
+!0 = !{!"f", i8 0, !2, !4}
+!1 = !{!"g", i8 1, !3, !5}
+!2 = !{i64 0, !"typeid1"}
+!3 = !{i64 0, !"typeid2"}
+!4 = !{i64 0, i64 1234}
+!5 = !{i64 0, i64 5678}
+!6 = !{i32 4, !"Cross-DSO CFI", i32 1}
diff --git a/test/Transforms/EarlyCSE/pr33406.ll b/test/Transforms/EarlyCSE/pr33406.ll
new file mode 100644
index 0000000000000..4d3312e1f0ac2
--- /dev/null
+++ b/test/Transforms/EarlyCSE/pr33406.ll
@@ -0,0 +1,26 @@
+; RUN: opt -early-cse-memssa -S %s | FileCheck %s
+
+; CHECK: define void @patatino() {
+; CHECK: for.cond:
+; CHECK-NEXT: br i1 true, label %if.end, label %for.inc
+; CHECK: if.end:
+; CHECK-NEXT: %tinkywinky = load i32, i32* @b
+; CHECK-NEXT: br i1 true, label %for.inc, label %for.inc
+; CHECK: for.inc:
+; CHECK-NEXT: ret void
+
+
+@b = external global i32
+
+define void @patatino() {
+for.cond:
+ br i1 true, label %if.end, label %for.inc
+
+if.end:
+ %tinkywinky = load i32, i32* @b
+ store i32 %tinkywinky, i32* @b
+ br i1 true, label %for.inc, label %for.inc
+
+for.inc:
+ ret void
+}
diff --git a/test/Transforms/GVN/pr32314.ll b/test/Transforms/GVN/pr32314.ll
new file mode 100644
index 0000000000000..90d14f6fc49c0
--- /dev/null
+++ b/test/Transforms/GVN/pr32314.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -gvn < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The load in the loop can not bypass the data from the previous loop. The store above it in the loop aliases.
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = alloca [3 x i32], align 4
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[P_017:%.*]] = phi i32* [ undef, [[ENTRY]] ], [ [[ARRAYIDX3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[A]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT: store i32 50, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[P_017]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[ARRAYIDX3]] = getelementptr inbounds [3 x i32], [3 x i32]* [[A]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i32 60, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 3
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+;
+entry:
+ %a = alloca [3 x i32], align 4
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
+ %p.017 = phi i32* [ undef, %entry ], [ %arrayidx3, %for.body ]
+ %0 = add nsw i64 %indvars.iv, -1
+ %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* %a, i64 0, i64 %0
+ store i32 50, i32* %arrayidx, align 4
+ %1 = shl i64 %indvars.iv, 1
+ %2 = load i32, i32* %p.017, align 4
+ %3 = trunc i64 %1 to i32
+ %add1 = add nsw i32 %2, %3
+ %arrayidx3 = getelementptr inbounds [3 x i32], [3 x i32]* %a, i64 0, i64 %indvars.iv
+ store i32 60, i32* %arrayidx3, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 3
+ br i1 %exitcond, label %for.body, label %for.cond.cleanup
+}
diff --git a/test/Transforms/GlobalMerge/debug-info.ll b/test/Transforms/GlobalMerge/debug-info.ll
index 97e0bb2148e93..8d60f3662431c 100644
--- a/test/Transforms/GlobalMerge/debug-info.ll
+++ b/test/Transforms/GlobalMerge/debug-info.ll
@@ -17,7 +17,7 @@ define void @use1() {
; CHECK: [[AVAR]] = !DIGlobalVariable(name: "a", scope: null, isLocal: false, isDefinition: true)
; CHECK: [[B]] = !DIGlobalVariableExpression(var: [[BVAR:![0-9]+]], expr: [[EXPR:![0-9]+]])
; CHECK: [[BVAR]] = !DIGlobalVariable(name: "b", scope: null, isLocal: false, isDefinition: true)
-; CHECK: [[EXPR]] = !DIExpression(DW_OP_plus, 4)
+; CHECK: [[EXPR]] = !DIExpression(DW_OP_plus_uconst, 4)
!llvm.module.flags = !{!4, !5}
diff --git a/test/Transforms/Inline/always-inline.ll b/test/Transforms/Inline/always-inline.ll
index 5366b5a16cc77..791eb94779b70 100644
--- a/test/Transforms/Inline/always-inline.ll
+++ b/test/Transforms/Inline/always-inline.ll
@@ -305,3 +305,14 @@ entry:
ret void
; CHECK: ret void
}
+
+define void @inner14() readnone nounwind {
+; CHECK: define void @inner14
+ ret void
+}
+
+define void @outer14() {
+; CHECK: call void @inner14
+ call void @inner14()
+ ret void
+}
diff --git a/test/Transforms/InstCombine/debuginfo-dce.ll b/test/Transforms/InstCombine/debuginfo-dce.ll
index 086743e80820b..50b8f1c6068e1 100644
--- a/test/Transforms/InstCombine/debuginfo-dce.ll
+++ b/test/Transforms/InstCombine/debuginfo-dce.ll
@@ -93,12 +93,12 @@ entry:
ret void, !dbg !32
}
-; CHECK: ![[LOAD_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_plus, 0)
-; CHECK: ![[BITCAST_EXPR]] = !DIExpression(DW_OP_plus, 0)
-; CHECK: ![[GEP0_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_plus, 0, DW_OP_stack_value)
-; CHECK: ![[GEP1_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_stack_value,
+; CHECK: ![[LOAD_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 0)
+; CHECK: ![[BITCAST_EXPR]] = !DIExpression(DW_OP_plus_uconst, 0)
+; CHECK: ![[GEP0_EXPR]] = !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_plus_uconst, 0, DW_OP_stack_value)
+; CHECK: ![[GEP1_EXPR]] = !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value,
; CHECK-SAME: DW_OP_LLVM_fragment, 0, 32)
-; CHECK: ![[GEP2_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_stack_value)
+; CHECK: ![[GEP2_EXPR]] = !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value)
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
@@ -130,7 +130,7 @@ attributes #1 = { nounwind readnone }
!17 = !{!18}
!18 = !DILocalVariable(name: "entry", scope: !14, file: !1, line: 6, type: !4)
!19 = !DILocation(line: 6, column: 17, scope: !14)
-!20 = !DIExpression(DW_OP_plus, 0)
+!20 = !DIExpression(DW_OP_plus_uconst, 0)
!21 = !DILocation(line: 11, column: 1, scope: !14)
!22 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !17)
!23 = !DILocation(line: 6, column: 17, scope: !22)
diff --git a/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll b/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll
index 107440f10a5a2..230ac1796671f 100644
--- a/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll
+++ b/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll
@@ -1,10 +1,11 @@
; RUN: opt -instcombine -unfold-element-atomic-memcpy-max-elements=8 -S < %s | FileCheck %s
+; Temporarily an expected failure until inst combine is updated in the next patch
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-; Test basic unfolding
-define void @test1(i8* %Src, i8* %Dst) {
-; CHECK-LABEL: test1
-; CHECK-NOT: llvm.memcpy.element.atomic
+; Test basic unfolding -- unordered load & store
+define void @test1a(i8* %Src, i8* %Dst) {
+; CHECK-LABEL: test1a
+; CHECK-NOT: llvm.memcpy.element.unordered.atomic
; CHECK-DAG: %memcpy_unfold.src_casted = bitcast i8* %Src to i32*
; CHECK-DAG: %memcpy_unfold.dst_casted = bitcast i8* %Dst to i32*
@@ -21,7 +22,7 @@ define void @test1(i8* %Src, i8* %Dst) {
; CHECK-DAG: [[VAL4:%[^\s]+]] = load atomic i32, i32* %{{[^\s]+}} unordered, align 4
; CHECK-DAG: store atomic i32 [[VAL4]], i32* %{{[^\s]+}} unordered, align 4
entry:
- call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dst, i8* align 8 %Src, i64 4, i32 4)
+ call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %Dst, i8* align 4 %Src, i32 16, i32 4)
ret void
}
@@ -31,9 +32,9 @@ define void @test2(i8* %Src, i8* %Dst) {
; CHECK-NOT: load
; CHECK-NOT: store
-; CHECK: llvm.memcpy.element.atomic
+; CHECK: llvm.memcpy.element.unordered.atomic
entry:
- call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dst, i8* align 4 %Src, i64 1000, i32 4)
+ call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %Dst, i8* align 4 %Src, i32 256, i32 4)
ret void
}
@@ -43,16 +44,16 @@ define void @test3(i8* %Src, i8* %Dst) {
; CHECK-NOT: load
; CHECK-NOT: store
-; CHECK: llvm.memcpy.element.atomic
+; CHECK: llvm.memcpy.element.unordered.atomic
entry:
- call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 64 %Dst, i8* align 64 %Src, i64 4, i32 64)
+ call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 64 %Dst, i8* align 64 %Src, i32 64, i32 64)
ret void
}
; Test that we will eliminate redundant bitcasts
define void @test4(i64* %Src, i64* %Dst) {
; CHECK-LABEL: test4
-; CHECK-NOT: llvm.memcpy.element.atomic
+; CHECK-NOT: llvm.memcpy.element.unordered.atomic
; CHECK-NOT: bitcast
@@ -76,17 +77,18 @@ define void @test4(i64* %Src, i64* %Dst) {
entry:
%Src.casted = bitcast i64* %Src to i8*
%Dst.casted = bitcast i64* %Dst to i8*
- call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 16 %Dst.casted, i8* align 16 %Src.casted, i64 4, i32 8)
+ call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %Dst.casted, i8* align 16 %Src.casted, i32 32, i32 8)
ret void
}
+; Test that 0-length unordered atomic memcpy gets removed.
define void @test5(i8* %Src, i8* %Dst) {
; CHECK-LABEL: test5
-; CHECK-NOT: llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 64 %Dst, i8* align 64 %Src, i64 0, i32 64)
+; CHECK-NOT: llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 64 %Dst, i8* align 64 %Src, i32 0, i32 8)
entry:
- call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 64 %Dst, i8* align 64 %Src, i64 0, i32 64)
+ call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 64 %Dst, i8* align 64 %Src, i32 0, i32 8)
ret void
}
-declare void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* nocapture, i8* nocapture, i64, i32)
+declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index d27fb5d89f092..af4ee85216ef2 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -1,12 +1,12 @@
; Test that the ffs* library call simplifier works correctly.
;
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-FFS
-; RUN: opt -instcombine -mtriple=arm64-apple-ios9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
-; RUN: opt -instcombine -mtriple=arm64-apple-tvos9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
-; RUN: opt -instcombine -mtriple=thumbv7k-apple-watchos2.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
-; RUN: opt -instcombine -mtriple=x86_64-apple-macosx10.11 -S %s | FileCheck --check-prefix=CHECK-FFS %s
-; RUN: opt -instcombine -mtriple=x86_64-freebsd-gnu -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt < %s -instcombine -S | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC
+; RUN: opt < %s -instcombine -mtriple i386-pc-linux -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=arm64-apple-ios9.0 -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=arm64-apple-tvos9.0 -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=thumbv7k-apple-watchos2.0 -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx10.11 -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=x86_64-freebsd-gnu -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
declare i32 @ffs(i32)
declare i32 @ffsl(i32)
@@ -15,123 +15,179 @@ declare i32 @ffsll(i64)
; Check ffs(0) -> 0.
define i32 @test_simplify1() {
-; CHECK-LABEL: @test_simplify1(
+; ALL-LABEL: @test_simplify1(
+; ALL-NEXT: ret i32 0
+;
%ret = call i32 @ffs(i32 0)
ret i32 %ret
-; CHECK-NEXT: ret i32 0
}
define i32 @test_simplify2() {
-; CHECK-FFS-LABEL: @test_simplify2(
+; GENERIC-LABEL: @test_simplify2(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsl(i32 0)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify2(
+; TARGET-NEXT: ret i32 0
+;
%ret = call i32 @ffsl(i32 0)
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 0
}
define i32 @test_simplify3() {
-; CHECK-FFS-LABEL: @test_simplify3(
+; GENERIC-LABEL: @test_simplify3(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 0)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify3(
+; TARGET-NEXT: ret i32 0
+;
%ret = call i32 @ffsll(i64 0)
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 0
}
; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
define i32 @test_simplify4() {
-; CHECK-LABEL: @test_simplify4(
+; ALL-LABEL: @test_simplify4(
+; ALL-NEXT: ret i32 1
+;
%ret = call i32 @ffs(i32 1)
ret i32 %ret
-; CHECK-NEXT: ret i32 1
}
define i32 @test_simplify5() {
-; CHECK-LABEL: @test_simplify5(
+; ALL-LABEL: @test_simplify5(
+; ALL-NEXT: ret i32 12
+;
%ret = call i32 @ffs(i32 2048)
ret i32 %ret
-; CHECK-NEXT: ret i32 12
}
define i32 @test_simplify6() {
-; CHECK-LABEL: @test_simplify6(
+; ALL-LABEL: @test_simplify6(
+; ALL-NEXT: ret i32 17
+;
%ret = call i32 @ffs(i32 65536)
ret i32 %ret
-; CHECK-NEXT: ret i32 17
}
define i32 @test_simplify7() {
-; CHECK-FFS-LABEL: @test_simplify7(
+; GENERIC-LABEL: @test_simplify7(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsl(i32 65536)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify7(
+; TARGET-NEXT: ret i32 17
+;
%ret = call i32 @ffsl(i32 65536)
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 17
}
define i32 @test_simplify8() {
-; CHECK-FFS-LABEL: @test_simplify8(
+; GENERIC-LABEL: @test_simplify8(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 1024)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify8(
+; TARGET-NEXT: ret i32 11
+;
%ret = call i32 @ffsll(i64 1024)
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 11
}
define i32 @test_simplify9() {
-; CHECK-FFS-LABEL: @test_simplify9(
+; GENERIC-LABEL: @test_simplify9(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 65536)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify9(
+; TARGET-NEXT: ret i32 17
+;
%ret = call i32 @ffsll(i64 65536)
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 17
}
define i32 @test_simplify10() {
-; CHECK-FFS-LABEL: @test_simplify10(
+; GENERIC-LABEL: @test_simplify10(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 17179869184)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify10(
+; TARGET-NEXT: ret i32 35
+;
%ret = call i32 @ffsll(i64 17179869184)
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 35
}
define i32 @test_simplify11() {
-; CHECK-FFS-LABEL: @test_simplify11(
+; GENERIC-LABEL: @test_simplify11(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 281474976710656)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify11(
+; TARGET-NEXT: ret i32 49
+;
%ret = call i32 @ffsll(i64 281474976710656)
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 49
}
define i32 @test_simplify12() {
-; CHECK-FFS-LABEL: @test_simplify12(
+; GENERIC-LABEL: @test_simplify12(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 1152921504606846976)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify12(
+; TARGET-NEXT: ret i32 61
+;
%ret = call i32 @ffsll(i64 1152921504606846976)
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 61
}
; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
define i32 @test_simplify13(i32 %x) {
-; CHECK-LABEL: @test_simplify13(
+; ALL-LABEL: @test_simplify13(
+; ALL-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; ALL-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
+; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0
+; ALL-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
+; ALL-NEXT: ret i32 [[TMP3]]
+;
%ret = call i32 @ffs(i32 %x)
-; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
-; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
-; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
ret i32 %ret
-; CHECK-NEXT: ret i32 [[RET]]
}
define i32 @test_simplify14(i32 %x) {
-; CHECK-FFS-LABEL: @test_simplify14(
+; GENERIC-LABEL: @test_simplify14(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsl(i32 %x)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify14(
+; TARGET-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
+; TARGET-NEXT: [[TMP2:%.*]] = icmp ne i32 %x, 0
+; TARGET-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
+; TARGET-NEXT: ret i32 [[TMP3]]
+;
%ret = call i32 @ffsl(i32 %x)
-; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
-; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
-; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
-; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 [[RET]]
}
define i32 @test_simplify15(i64 %x) {
-; CHECK-FFS-LABEL: @test_simplify15(
+; GENERIC-LABEL: @test_simplify15(
+; GENERIC-NEXT: [[RET:%.*]] = call i32 @ffsll(i64 %x)
+; GENERIC-NEXT: ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify15(
+; TARGET-NEXT: [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[CTTZ]], 1
+; TARGET-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; TARGET-NEXT: [[TMP3:%.*]] = icmp ne i64 %x, 0
+; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
+; TARGET-NEXT: ret i32 [[TMP4]]
+;
%ret = call i32 @ffsll(i64 %x)
-; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
-; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
-; CHECK-FFS-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
-; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
-; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
ret i32 %ret
-; CHECK-FFS-NEXT: ret i32 [[RET]]
}
+
diff --git a/test/Transforms/InstCombine/lshr.ll b/test/Transforms/InstCombine/lshr.ll
index 71b25177162b9..4cdcb98f730c1 100644
--- a/test/Transforms/InstCombine/lshr.ll
+++ b/test/Transforms/InstCombine/lshr.ll
@@ -122,10 +122,19 @@ define <2 x i8> @bool_zext_splat(<2 x i1> %x) {
ret <2 x i8> %hibit
}
-; FIXME: The replicated sign bits are all that's left. This could be ashr+zext.
-
-define i16 @smear_sign_and_widen(i4 %x) {
+define i32 @smear_sign_and_widen(i8 %x) {
; CHECK-LABEL: @smear_sign_and_widen(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i8 %x, 7
+; CHECK-NEXT: [[HIBIT:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT: ret i32 [[HIBIT]]
+;
+ %sext = sext i8 %x to i32
+ %hibit = lshr i32 %sext, 24
+ ret i32 %hibit
+}
+
+define i16 @smear_sign_and_widen_should_not_change_type(i4 %x) {
+; CHECK-LABEL: @smear_sign_and_widen_should_not_change_type(
; CHECK-NEXT: [[SEXT:%.*]] = sext i4 %x to i16
; CHECK-NEXT: [[HIBIT:%.*]] = lshr i16 [[SEXT]], 12
; CHECK-NEXT: ret i16 [[HIBIT]]
@@ -137,8 +146,8 @@ define i16 @smear_sign_and_widen(i4 %x) {
define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) {
; CHECK-LABEL: @smear_sign_and_widen_splat(
-; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i6> %x to <2 x i8>
-; CHECK-NEXT: [[HIBIT:%.*]] = lshr <2 x i8> [[SEXT]], <i8 2, i8 2>
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i6> %x, <i6 2, i6 2>
+; CHECK-NEXT: [[HIBIT:%.*]] = zext <2 x i6> [[TMP1]] to <2 x i8>
; CHECK-NEXT: ret <2 x i8> [[HIBIT]]
;
%sext = sext <2 x i6> %x to <2 x i8>
diff --git a/test/Transforms/InstCombine/onehot_merge.ll b/test/Transforms/InstCombine/onehot_merge.ll
index 496d847b5321e..47a4ca4b628bf 100644
--- a/test/Transforms/InstCombine/onehot_merge.ll
+++ b/test/Transforms/InstCombine/onehot_merge.ll
@@ -33,3 +33,79 @@ bb:
ret i1 %or
}
+; Same as above but with operands commuted one of the ands, but not the other.
+define i1 @foo1_and_commuted(i32 %k, i32 %c1, i32 %c2) {
+; CHECK-LABEL: @foo1_and_commuted(
+; CHECK-NEXT: [[K2:%.*]] = mul i32 [[K:%.*]], [[K]]
+; CHECK-NEXT: [[TMP:%.*]] = shl i32 1, [[C1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 -2147483648, [[C2:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[TMP]], [[TMP4]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[K2]], [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %k2 = mul i32 %k, %k ; to trick the complexity sorting
+ %tmp = shl i32 1, %c1
+ %tmp4 = lshr i32 -2147483648, %c2
+ %tmp1 = and i32 %k2, %tmp
+ %tmp2 = icmp eq i32 %tmp1, 0
+ %tmp5 = and i32 %tmp4, %k2
+ %tmp6 = icmp eq i32 %tmp5, 0
+ %or = or i1 %tmp2, %tmp6
+ ret i1 %or
+}
+
+define i1 @or_consts(i32 %k, i32 %c1, i32 %c2) {
+; CHECK-LABEL: @or_consts(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[K:%.*]], 12
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 12
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %tmp1 = and i32 4, %k
+ %tmp2 = icmp ne i32 %tmp1, 0
+ %tmp5 = and i32 8, %k
+ %tmp6 = icmp ne i32 %tmp5, 0
+ %or = and i1 %tmp2, %tmp6
+ ret i1 %or
+}
+
+define i1 @foo1_or(i32 %k, i32 %c1, i32 %c2) {
+; CHECK-LABEL: @foo1_or(
+; CHECK-NEXT: [[TMP:%.*]] = shl i32 1, [[C1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 -2147483648, [[C2:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP]], [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[K:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: ret i1 [[TMP3]]
+;
+ %tmp = shl i32 1, %c1
+ %tmp4 = lshr i32 -2147483648, %c2
+ %tmp1 = and i32 %tmp, %k
+ %tmp2 = icmp ne i32 %tmp1, 0
+ %tmp5 = and i32 %tmp4, %k
+ %tmp6 = icmp ne i32 %tmp5, 0
+ %or = and i1 %tmp2, %tmp6
+ ret i1 %or
+}
+
+; Same as above but with operands commuted one of the ors, but not the other.
+define i1 @foo1_or_commuted(i32 %k, i32 %c1, i32 %c2) {
+; CHECK-LABEL: @foo1_or_commuted(
+; CHECK-NEXT: [[K2:%.*]] = mul i32 [[K:%.*]], [[K]]
+; CHECK-NEXT: [[TMP:%.*]] = shl i32 1, [[C1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 -2147483648, [[C2:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP]], [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[K2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: ret i1 [[TMP3]]
+;
+ %k2 = mul i32 %k, %k ; to trick the complexity sorting
+ %tmp = shl i32 1, %c1
+ %tmp4 = lshr i32 -2147483648, %c2
+ %tmp1 = and i32 %k2, %tmp
+ %tmp2 = icmp ne i32 %tmp1, 0
+ %tmp5 = and i32 %tmp4, %k2
+ %tmp6 = icmp ne i32 %tmp5, 0
+ %or = and i1 %tmp2, %tmp6
+ ret i1 %or
+}
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index f2bc290d79a45..485f9612376ad 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -114,6 +114,17 @@ define i32 @test10(i32 %A, i32 %B) {
ret i32 %or
}
+define i32 @test10_commuted(i32 %A, i32 %B) {
+; CHECK-LABEL: @test10_commuted(
+; CHECK-NEXT: ret i32 -1
+;
+ %xor1 = xor i32 %B, %A
+ %not = xor i32 %A, -1
+ %xor2 = xor i32 %not, %B
+ %or = or i32 %xor2, %xor1
+ ret i32 %or
+}
+
; (x | y) & ((~x) ^ y) -> (x & y)
define i32 @test11(i32 %x, i32 %y) {
; CHECK-LABEL: @test11(
@@ -300,3 +311,36 @@ define i8 @or_xor_or(i8 %x) {
ret i8 %or2
}
+define i8 @test17(i8 %A, i8 %B) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33
+; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[XOR1]], [[XOR2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i8 [[OR]], [[XOR2]]
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %xor1 = xor i8 %B, %A
+ %not = xor i8 %A, 33
+ %xor2 = xor i8 %not, %B
+ %or = or i8 %xor1, %xor2
+ %res = mul i8 %or, %xor2 ; to increase the use count for the xor
+ ret i8 %res
+}
+
+define i8 @test18(i8 %A, i8 %B) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33
+; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[XOR2]], [[XOR1]]
+; CHECK-NEXT: [[RES:%.*]] = mul i8 [[OR]], [[XOR2]]
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %xor1 = xor i8 %B, %A
+ %not = xor i8 %A, 33
+ %xor2 = xor i8 %not, %B
+ %or = or i8 %xor2, %xor1
+ %res = mul i8 %or, %xor2 ; to increase the use count for the xor
+ ret i8 %res
+}
diff --git a/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/test/Transforms/InstCombine/select-with-bitwise-ops.ll
index 68b73af21a8d6..faeb4e046aca8 100644
--- a/test/Transforms/InstCombine/select-with-bitwise-ops.ll
+++ b/test/Transforms/InstCombine/select-with-bitwise-ops.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "n8:16:32:64"
+
define i32 @select_icmp_eq_and_1_0_or_2(i32 %x, i32 %y) {
; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2(
; CHECK-NEXT: [[AND:%.*]] = shl i32 %x, 1
@@ -295,3 +297,269 @@ define i32 @test67(i16 %x) {
ret i32 %3
}
+define i32 @test68(i32 %x, i32 %y) {
+; CHECK-LABEL: @test68(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[TMP1]], -1
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT: ret i32 [[SELECT]]
+;
+ %and = and i32 %x, 128
+ %cmp = icmp eq i32 %and, 0
+ %or = or i32 %y, 2
+ %select = select i1 %cmp, i32 %y, i32 %or
+ ret i32 %select
+}
+
+define i32 @test69(i32 %x, i32 %y) {
+; CHECK-LABEL: @test69(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT: ret i32 [[SELECT]]
+;
+ %and = and i32 %x, 128
+ %cmp = icmp ne i32 %and, 0
+ %or = or i32 %y, 2
+ %select = select i1 %cmp, i32 %y, i32 %or
+ ret i32 %select
+}
+
+define i32 @shift_no_xor_multiuse_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @shift_no_xor_multiuse_or(
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2
+; CHECK-NEXT: [[AND:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[OR]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %and = and i32 %x, 1
+ %cmp = icmp eq i32 %and, 0
+ %or = or i32 %y, 2
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %res = mul i32 %select, %or ; to bump up use count of the Or
+ ret i32 %res
+}
+
+define i32 @no_shift_no_xor_multiuse_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_or(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP1]], [[OR]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %and = and i32 %x, 4096
+ %cmp = icmp eq i32 %and, 0
+ %or = or i32 %y, 4096
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %res = mul i32 %select, %or ; to bump up use count of the Or
+ ret i32 %res
+}
+
+define i32 @no_shift_xor_multiuse_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @no_shift_xor_multiuse_or(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[OR]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %and = and i32 %x, 4096
+ %cmp = icmp ne i32 0, %and
+ %or = or i32 %y, 4096
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %res = mul i32 %select, %or ; to bump up use count of the Or
+ ret i32 %res
+}
+
+; TODO this increased the number of instructions
+define i32 @shift_xor_multiuse_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @shift_xor_multiuse_or(
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
+; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[X:%.*]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 2048
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[OR]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %and = and i32 %x, 4096
+ %cmp = icmp ne i32 0, %and
+ %or = or i32 %y, 2048
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %res = mul i32 %select, %or ; to bump up use count of the Or
+ ret i32 %res
+}
+
+define i32 @shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_no_xor_multiuse_cmp(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[AND]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %and = and i32 %x, 1
+ %cmp = icmp eq i32 %and, 0
+ %or = or i32 %y, 2
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+ %res = mul i32 %select, %select2
+ ret i32 %res
+}
+
+define i32 @no_shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], [[Y:%.*]]
+; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP1]], [[SELECT2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %and = and i32 %x, 4096
+ %cmp = icmp eq i32 %and, 0
+ %or = or i32 %y, 4096
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+ %res = mul i32 %select, %select2
+ ret i32 %res
+}
+
+define i32 @no_shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_xor_multiuse_cmp(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %and = and i32 %x, 4096
+ %cmp = icmp ne i32 0, %and
+ %or = or i32 %y, 4096
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+ %res = mul i32 %select, %select2
+ ret i32 %res
+}
+
+; TODO this increased the number of instructions
+define i32 @shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_xor_multiuse_cmp(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[SELECT2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %and = and i32 %x, 4096
+ %cmp = icmp ne i32 0, %and
+ %or = or i32 %y, 2048
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+ %res = mul i32 %select, %select2
+ ret i32 %res
+}
+
+; TODO this increased the number of instructions
+define i32 @shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_no_xor_multiuse_cmp_or(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[AND]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
+; CHECK-NEXT: ret i32 [[RES2]]
+;
+ %and = and i32 %x, 1
+ %cmp = icmp eq i32 %and, 0
+ %or = or i32 %y, 2
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+ %res = mul i32 %select, %select2
+ %res2 = mul i32 %res, %or ; to bump up the use count of the or
+ ret i32 %res2
+}
+
+define i32 @no_shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_or(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], [[Y]]
+; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP1]], [[SELECT2]]
+; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
+; CHECK-NEXT: ret i32 [[RES2]]
+;
+ %and = and i32 %x, 4096
+ %cmp = icmp eq i32 %and, 0
+ %or = or i32 %y, 4096
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+ %res = mul i32 %select, %select2
+ %res2 = mul i32 %res, %or ; to bump up the use count of the or
+ ret i32 %res2
+}
+
+; TODO this increased the number of instructions
+define i32 @no_shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_xor_multiuse_cmp_or(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
+; CHECK-NEXT: ret i32 [[RES2]]
+;
+ %and = and i32 %x, 4096
+ %cmp = icmp ne i32 0, %and
+ %or = or i32 %y, 4096
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+ %res = mul i32 %select, %select2
+ %res2 = mul i32 %res, %or ; to bump up the use count of the or
+ ret i32 %res2
+}
+
+; TODO this increased the number of instructions
+define i32 @shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_xor_multiuse_cmp_or(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y]]
+; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[SELECT2]]
+; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
+; CHECK-NEXT: ret i32 [[RES2]]
+;
+ %and = and i32 %x, 4096
+ %cmp = icmp ne i32 0, %and
+ %or = or i32 %y, 2048
+ %select = select i1 %cmp, i32 %y, i32 %or
+ %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+ %res = mul i32 %select, %select2
+ %res2 = mul i32 %res, %or ; to bump up the use count of the or
+ ret i32 %res2
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index ce8e2fcd38b9b..68bbf35d1e65a 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -1306,3 +1306,13 @@ define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) {
ret <2 x i8> %shr
}
+; Make sure known bits works correctly with non power of 2 bit widths.
+define i7 @test65(i7 %a, i7 %b) {
+; CHECK-LABEL: @test65(
+; CHECK-NEXT: ret i7 0
+;
+ %shiftamt = and i7 %b, 6 ; this ensures the shift amount is even and less than the bit width.
+ %x = lshr i7 42, %shiftamt ; 42 has a zero in every even numbered bit and a one in every odd bit.
+ %y = and i7 %x, 1 ; this extracts the lsb which should be 0 because we shifted an even number of bits and all even bits of the shift input are 0.
+ ret i7 %y
+}
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index 3afbf632f6e19..49e6b999fbce2 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -325,3 +325,36 @@ define i32 @test14(i32 %a, i32 %b, i32 %c) {
ret i32 %xor
}
+define i8 @test15(i8 %A, i8 %B) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33
+; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR1]], [[XOR2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i8 [[AND]], [[XOR2]]
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %xor1 = xor i8 %B, %A
+ %not = xor i8 %A, 33
+ %xor2 = xor i8 %not, %B
+ %and = and i8 %xor1, %xor2
+ %res = mul i8 %and, %xor2 ; to increase the use count for the xor
+ ret i8 %res
+}
+
+define i8 @test16(i8 %A, i8 %B) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33
+; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR2]], [[XOR1]]
+; CHECK-NEXT: [[RES:%.*]] = mul i8 [[AND]], [[XOR2]]
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %xor1 = xor i8 %B, %A
+ %not = xor i8 %A, 33
+ %xor2 = xor i8 %not, %B
+ %and = and i8 %xor2, %xor1
+ %res = mul i8 %and, %xor2 ; to increase the use count for the xor
+ ret i8 %res
+}
diff --git a/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll b/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll
index ec93847178b58..d52378b864ff9 100644
--- a/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll
+++ b/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu"
;; memcpy.atomic formation (atomic load & store)
define void @test1(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test1(
-; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
+; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
; CHECK-NOT: store
; CHECK: ret void
bb.nph:
@@ -30,7 +30,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation (atomic store, normal load)
define void @test2(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test2(
-; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
+; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
; CHECK-NOT: store
; CHECK: ret void
bb.nph:
@@ -55,7 +55,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (atomic store, normal load w/ no align)
define void @test2b(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test2b(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
@@ -80,7 +80,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (atomic store, normal load w/ bad align)
define void @test2c(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test2c(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
@@ -105,7 +105,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (atomic store w/ bad align, normal load)
define void @test2d(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test2d(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
@@ -131,7 +131,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation (normal store, atomic load)
define void @test3(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test3(
-; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
+; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
; CHECK-NOT: store
; CHECK: ret void
bb.nph:
@@ -156,7 +156,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (normal store w/ no align, atomic load)
define void @test3b(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test3b(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
@@ -181,7 +181,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (normal store, atomic load w/ bad align)
define void @test3c(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test3c(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
@@ -206,7 +206,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (normal store w/ bad align, atomic load)
define void @test3d(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test3d(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
@@ -232,7 +232,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (atomic load, ordered-atomic store)
define void @test4(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test4(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
@@ -257,7 +257,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (ordered-atomic load, unordered-atomic store)
define void @test5(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test5(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
@@ -282,7 +282,8 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation (atomic load & store) -- element size 2
define void @test6(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test6(
-; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 %Size, i32 2)
+; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 1
+; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 [[Sz]], i32 2)
; CHECK-NOT: store
; CHECK: ret void
bb.nph:
@@ -307,7 +308,8 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation (atomic load & store) -- element size 4
define void @test7(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test7(
-; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 %Size, i32 4)
+; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 2
+; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 [[Sz]], i32 4)
; CHECK-NOT: store
; CHECK: ret void
bb.nph:
@@ -332,7 +334,8 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation (atomic load & store) -- element size 8
define void @test8(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test8(
-; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 %Size, i32 8)
+; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 3
+; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 [[Sz]], i32 8)
; CHECK-NOT: store
; CHECK: ret void
bb.nph:
@@ -357,7 +360,8 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (atomic load & store) -- element size 16
define void @test9(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test9(
-; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 %Size, i32 16)
+; CHECK: [[Sz:%[0-9]+]] = shl i64 %Size, 4
+; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 [[Sz]], i32 16)
; CHECK-NOT: store
; CHECK: ret void
bb.nph:
@@ -382,7 +386,7 @@ for.end: ; preds = %for.body, %entry
;; memcpy.atomic formation rejection (atomic load & store) -- element size 32
define void @test10(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test10(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
diff --git a/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll b/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll
index b2528f1c24577..341a7a0baebf0 100644
--- a/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll
+++ b/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
;; Will not create call due to a max element size of 0
define void @test1(i64 %Size) nounwind ssp {
; CHECK-LABEL: @test1(
-; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
; CHECK: store
; CHECK: ret void
bb.nph:
diff --git a/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml b/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml
new file mode 100644
index 0000000000000..17b634acd0e1a
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml
@@ -0,0 +1,19 @@
+---
+TypeIdMap:
+ typeid1:
+ TTRes:
+ Kind: AllOnes
+ SizeM1BitWidth: 7
+ typeid2:
+ TTRes:
+ Kind: Single
+ SizeM1BitWidth: 0
+WithGlobalValueDeadStripping: false
+CfiFunctionDefs:
+ - local_a
+ - local_b
+ - does_not_exist
+CfiFunctionDecls:
+ - external
+ - external_weak
+...
diff --git a/test/Transforms/LowerTypeTests/export-icall.ll b/test/Transforms/LowerTypeTests/export-icall.ll
new file mode 100644
index 0000000000000..ad36048993067
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/export-icall.ll
@@ -0,0 +1,70 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s
+; RUN: FileCheck --check-prefix=SUMMARY %s < %t
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @h(i8 %x) !type !2 {
+ ret void
+}
+
+declare !type !8 void @f(i32 %x)
+
+!cfi.functions = !{!0, !1, !3, !4, !5, !6}
+
+; declaration of @h with a different type is ignored
+!0 = !{!"h", i8 1, !7}
+
+; extern_weak declaration of @h with a different type is ignored as well
+!1 = !{!"h", i8 2, !8}
+!2 = !{i64 0, !"typeid1"}
+
+; definition of @f replaces types on the IR declaration above
+!3 = !{!"f", i8 0, !2}
+!4 = !{!"external", i8 1, !2}
+!5 = !{!"external_weak", i8 2, !2}
+!6 = !{!"g", i8 0, !7}
+!7 = !{i64 0, !"typeid2"}
+!8 = !{i64 0, !"typeid3"}
+
+
+; CHECK-DAG: @__typeid_typeid1_global_addr = hidden alias i8, bitcast (void ()* [[JT1:.*]] to i8*)
+; CHECK-DAG: @__typeid_typeid1_align = hidden alias i8, inttoptr (i8 3 to i8*)
+; CHECK-DAG: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 3 to i8*)
+
+; CHECK-DAG: @h = alias void (i8), bitcast (void ()* [[JT1]] to void (i8)*)
+; CHECK-DAG: @f = alias void (i32), {{.*}}getelementptr {{.*}}void ()* [[JT1]]
+; CHECK-DAG: @external.cfi_jt = hidden alias void (), {{.*}}getelementptr {{.*}}void ()* [[JT1]]
+; CHECK-DAG: @external_weak.cfi_jt = hidden alias void (), {{.*}}getelementptr {{.*}}void ()* [[JT1]]
+
+; CHECK-DAG: @__typeid_typeid2_global_addr = hidden alias i8, bitcast (void ()* [[JT2:.*]] to i8*)
+
+; CHECK-DAG: @g = alias void (), void ()* [[JT2]]
+
+; CHECK-DAG: define internal void @h.cfi(i8 {{.*}}) !type !{{.*}}
+; CHECK-DAG: declare !type !{{.*}} void @external()
+; CHECK-DAG: declare !type !{{.*}} void @external_weak()
+; CHECK-DAG: declare !type !{{.*}} void @f.cfi(i32)
+; CHECK-DAG: declare !type !{{.*}} void @g.cfi()
+
+
+; SUMMARY: TypeIdMap:
+; SUMMARY-NEXT: typeid1:
+; SUMMARY-NEXT: TTRes:
+; SUMMARY-NEXT: Kind: AllOnes
+; SUMMARY-NEXT: SizeM1BitWidth: 7
+; SUMMARY-NEXT: WPDRes:
+; SUMMARY-NEXT: typeid2:
+; SUMMARY-NEXT: TTRes:
+; SUMMARY-NEXT: Kind: Single
+; SUMMARY-NEXT: SizeM1BitWidth: 0
+; SUMMARY-NEXT: WPDRes:
+
+; SUMMARY: CfiFunctionDefs:
+; SUMMARY-NEXT: - f
+; SUMMARY-NEXT: - g
+; SUMMARY-NEXT: - h
+; SUMMARY-NEXT: CfiFunctionDecls:
+; SUMMARY-NEXT: - external
+; SUMMARY-NEXT: - external_weak
+; SUMMARY-NEXT: ...
diff --git a/test/Transforms/LowerTypeTests/import-icall.ll b/test/Transforms/LowerTypeTests/import-icall.ll
new file mode 100644
index 0000000000000..ddeb7fb5c9a2b
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/import-icall.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import-icall.yaml < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8 @local_a() {
+ call void @external()
+ call void @external_weak()
+ ret i8 1
+}
+
+define internal i8 @local_b() {
+ %x = call i8 @local_a()
+ ret i8 %x
+}
+
+define i8 @use_b() {
+ %x = call i8 @local_b()
+ ret i8 %x
+}
+
+
+declare void @external()
+declare extern_weak void @external_weak()
+
+; CHECK: define hidden i8 @local_a.cfi() {
+; CHECK-NEXT: call void @external.cfi_jt()
+; CHECK-NEXT: call void select (i1 icmp ne (void ()* @external_weak, void ()* null), void ()* @external_weak.cfi_jt, void ()* null)()
+; CHECK-NEXT: ret i8 1
+; CHECK-NEXT: }
+
+; internal @local_b is not the same function as "local_b" in the summary.
+; CHECK: define internal i8 @local_b() {
+; CHECK-NEXT: call i8 @local_a()
+
+; CHECK: declare void @external()
+; CHECK: declare extern_weak void @external_weak()
+; CHECK: declare i8 @local_a()
+; CHECK: declare hidden void @external.cfi_jt()
+; CHECK: declare hidden void @external_weak.cfi_jt()
diff --git a/test/Transforms/PGOProfile/memop_size_opt.ll b/test/Transforms/PGOProfile/memop_size_opt.ll
index 19a2b7ed293b2..e11f235a48e76 100644
--- a/test/Transforms/PGOProfile/memop_size_opt.ll
+++ b/test/Transforms/PGOProfile/memop_size_opt.ll
@@ -38,7 +38,7 @@ for.body3:
; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i32 1, i1 false)
; MEMOP_OPT: br label %[[MERGE_LABEL:.*]]
; MEMOP_OPT: [[DEFAULT_LABEL]]:
-; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i32 1, i1 false){{[[:space:]]}}
+; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i32 1, i1 false), !prof [[NEWVP:![0-9]+]]
; MEMOP_OPT: br label %[[MERGE_LABEL]]
; MEMOP_OPT: [[MERGE_LABEL]]:
; MEMOP_OPT: switch i64 %conv, label %[[DEFAULT_LABEL2:.*]] [
@@ -48,11 +48,16 @@ for.body3:
; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src2, i64 1, i32 1, i1 false)
; MEMOP_OPT: br label %[[MERGE_LABEL2:.*]]
; MEMOP_OPT: [[DEFAULT_LABEL2]]:
-; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src2, i64 %conv, i32 1, i1 false){{[[:space:]]}}
+; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src2, i64 %conv, i32 1, i1 false), !prof [[NEWVP]]
; MEMOP_OPT: br label %[[MERGE_LABEL2]]
; MEMOP_OPT: [[MERGE_LABEL2]]:
; MEMOP_OPT: br label %for.inc
; MEMOP_OPT: [[SWITCH_BW]] = !{!"branch_weights", i32 457, i32 99}
+; Should be 457 total left (original total count 556, minus 99 from specialized
+; value 1, which is removed from VP array. Also, we only end up with 5 total
+; values, since the default max number of promotions is 5 and therefore
+; the rest of the values are ignored when extracting the VP metadata.
+; MEMOP_OPT: [[NEWVP]] = !{!"VP", i32 1, i64 457, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66}
for.inc:
%inc = add nsw i32 %j.0, 1
diff --git a/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll b/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll
new file mode 100644
index 0000000000000..105afa9def5c1
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll
@@ -0,0 +1,92 @@
+; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s
+
+; This test checks that metadata that's invalid after RS4GC is dropped.
+; We can miscompile if optimizations scheduled after RS4GC uses the
+; metadata that's infact invalid.
+
+declare void @bar()
+
+declare void @baz(i32)
+; Confirm that loadedval instruction does not contain invariant.load metadata.
+; but contains the range metadata.
+; Since loadedval is not marked invariant, it will prevent incorrectly sinking
+; %loadedval in LICM and avoid creation of an unrelocated use of %baseaddr.
+define void @test_invariant_load() gc "statepoint-example" {
+; CHECK-LABEL: @test_invariant_load
+; CHECK: %loadedval = load i32, i32 addrspace(1)* %baseaddr, align 8, !range !0
+bb:
+ br label %outerloopHdr
+
+outerloopHdr: ; preds = %bb6, %bb
+ %baseaddr = phi i32 addrspace(1)* [ undef, %bb ], [ %tmp4, %bb6 ]
+; LICM may sink this load to exit block after RS4GC because it's tagged invariant.
+ %loadedval = load i32, i32 addrspace(1)* %baseaddr, align 8, !range !0, !invariant.load !1
+ br label %innerloopHdr
+
+innerloopHdr: ; preds = %innerlooplatch, %outerloopHdr
+ %tmp4 = phi i32 addrspace(1)* [ %baseaddr, %outerloopHdr ], [ %gep, %innerlooplatch ]
+ br label %innermostloophdr
+
+innermostloophdr: ; preds = %bb6, %innerloopHdr
+ br i1 undef, label %exitblock, label %bb6
+
+bb6: ; preds = %innermostloophdr
+ switch i32 undef, label %innermostloophdr [
+ i32 0, label %outerloopHdr
+ i32 1, label %innerlooplatch
+ ]
+
+innerlooplatch: ; preds = %bb6
+ call void @bar()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %tmp4, i64 8
+ br label %innerloopHdr
+
+exitblock: ; preds = %innermostloophdr
+ %tmp13 = add i32 42, %loadedval
+ call void @baz(i32 %tmp13)
+ unreachable
+}
+
+; drop the noalias metadata.
+define void @test_noalias(i32 %x, i32 addrspace(1)* %p, i32 addrspace(1)* %q) gc "statepoint-example" {
+; CHECK-LABEL: test_noalias
+; CHECK: %y = load i32, i32 addrspace(1)* %q, align 16
+; CHECK: gc.statepoint
+; CHECK: %p.relocated
+; CHECK-NEXT: %p.relocated.casted = bitcast i8 addrspace(1)* %p.relocated to i32 addrspace(1)*
+; CHECK-NEXT: store i32 %x, i32 addrspace(1)* %p.relocated.casted, align 16
+entry:
+ %y = load i32, i32 addrspace(1)* %q, align 16, !noalias !3
+ call void @baz(i32 %x)
+ store i32 %x, i32 addrspace(1)* %p, align 16, !noalias !4
+ ret void
+}
+
+; drop the dereferenceable metadata
+define void @test_dereferenceable(i32 addrspace(1)* addrspace(1)* %p, i32 %x, i32 addrspace(1)* %q) gc "statepoint-example" {
+; CHECK-LABEL: test_dereferenceable
+; CHECK: %v1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p
+; CHECK-NEXT: %v2 = load i32, i32 addrspace(1)* %v1
+; CHECK: gc.statepoint
+ %v1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p, !dereferenceable !5
+ %v2 = load i32, i32 addrspace(1)* %v1
+ call void @baz(i32 %x)
+ store i32 %v2, i32 addrspace(1)* %q, align 16
+ ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64, i32, void (i32)*, i32, i32, ...)
+
+; Function Attrs: nounwind readonly
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) #0
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
+attributes #0 = { nounwind readonly }
+
+!0 = !{i32 0, i32 2147483647}
+!1 = !{}
+!2 = !{i32 10, i32 1}
+!3 = !{!3}
+!4 = !{!4}
+!5 = !{i64 8}
diff --git a/test/Transforms/SLPVectorizer/X86/arith-add.ll b/test/Transforms/SLPVectorizer/X86/arith-add.ll
index 0266758b27d23..22b2c7422933b 100644
--- a/test/Transforms/SLPVectorizer/X86/arith-add.ll
+++ b/test/Transforms/SLPVectorizer/X86/arith-add.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
@@ -38,6 +39,25 @@ define void @add_v8i64() {
; SSE-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
; SSE-NEXT: ret void
;
+; SLM-LABEL: @add_v8i64(
+; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP1]], [[TMP5]]
+; SLM-NEXT: [[TMP10:%.*]] = add <2 x i64> [[TMP2]], [[TMP6]]
+; SLM-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP3]], [[TMP7]]
+; SLM-NEXT: [[TMP12:%.*]] = add <2 x i64> [[TMP4]], [[TMP8]]
+; SLM-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
+; SLM-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT: ret void
+;
; AVX-LABEL: @add_v8i64(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -111,6 +131,25 @@ define void @add_v16i32() {
; SSE-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
; SSE-NEXT: ret void
;
+; SLM-LABEL: @add_v16i32(
+; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP1]], [[TMP5]]
+; SLM-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP2]], [[TMP6]]
+; SLM-NEXT: [[TMP11:%.*]] = add <4 x i32> [[TMP3]], [[TMP7]]
+; SLM-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP4]], [[TMP8]]
+; SLM-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
+; SLM-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT: ret void
+;
; AVX-LABEL: @add_v16i32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
@@ -216,6 +255,25 @@ define void @add_v32i16() {
; SSE-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
; SSE-NEXT: ret void
;
+; SLM-LABEL: @add_v32i16(
+; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP1]], [[TMP5]]
+; SLM-NEXT: [[TMP10:%.*]] = add <8 x i16> [[TMP2]], [[TMP6]]
+; SLM-NEXT: [[TMP11:%.*]] = add <8 x i16> [[TMP3]], [[TMP7]]
+; SLM-NEXT: [[TMP12:%.*]] = add <8 x i16> [[TMP4]], [[TMP8]]
+; SLM-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
+; SLM-NEXT: store <8 x i16> [[TMP10]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT: ret void
+;
; AVX-LABEL: @add_v32i16(
; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
diff --git a/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/test/Transforms/SLPVectorizer/X86/arith-fp.ll
index e00ed849ee4b5..119cf594c905d 100644
--- a/test/Transforms/SLPVectorizer/X86/arith-fp.ll
+++ b/test/Transforms/SLPVectorizer/X86/arith-fp.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
@@ -69,13 +70,32 @@ define <2 x double> @buildvector_mul_2f64(<2 x double> %a, <2 x double> %b) {
}
define <2 x double> @buildvector_div_2f64(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @buildvector_div_2f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
-; CHECK-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1
-; CHECK-NEXT: ret <2 x double> [[R1]]
+; SSE-LABEL: @buildvector_div_2f64(
+; SSE-NEXT: [[TMP1:%.*]] = fdiv <2 x double> [[A:%.*]], [[B:%.*]]
+; SSE-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
+; SSE-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
+; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+; SSE-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1
+; SSE-NEXT: ret <2 x double> [[R1]]
+;
+; SLM-LABEL: @buildvector_div_2f64(
+; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
+; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
+; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1
+; SLM-NEXT: [[C0:%.*]] = fdiv double [[A0]], [[B0]]
+; SLM-NEXT: [[C1:%.*]] = fdiv double [[A1]], [[B1]]
+; SLM-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
+; SLM-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[C1]], i32 1
+; SLM-NEXT: ret <2 x double> [[R1]]
+;
+; AVX-LABEL: @buildvector_div_2f64(
+; AVX-NEXT: [[TMP1:%.*]] = fdiv <2 x double> [[A:%.*]], [[B:%.*]]
+; AVX-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
+; AVX-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
+; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+; AVX-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1
+; AVX-NEXT: ret <2 x double> [[R1]]
;
%a0 = extractelement <2 x double> %a, i32 0
%a1 = extractelement <2 x double> %a, i32 1
@@ -317,17 +337,48 @@ define <4 x double> @buildvector_mul_4f64(<4 x double> %a, <4 x double> %b) {
}
define <4 x double> @buildvector_div_4f64(<4 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @buildvector_div_4f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
-; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
-; CHECK-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
-; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
-; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3
-; CHECK-NEXT: ret <4 x double> [[R3]]
+; SSE-LABEL: @buildvector_div_4f64(
+; SSE-NEXT: [[TMP1:%.*]] = fdiv <4 x double> [[A:%.*]], [[B:%.*]]
+; SSE-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+; SSE-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
+; SSE-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+; SSE-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1
+; SSE-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+; SSE-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2
+; SSE-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+; SSE-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3
+; SSE-NEXT: ret <4 x double> [[R3]]
+;
+; SLM-LABEL: @buildvector_div_4f64(
+; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
+; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1
+; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2
+; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3
+; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1
+; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2
+; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3
+; SLM-NEXT: [[C0:%.*]] = fdiv double [[A0]], [[B0]]
+; SLM-NEXT: [[C1:%.*]] = fdiv double [[A1]], [[B1]]
+; SLM-NEXT: [[C2:%.*]] = fdiv double [[A2]], [[B2]]
+; SLM-NEXT: [[C3:%.*]] = fdiv double [[A3]], [[B3]]
+; SLM-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0
+; SLM-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1
+; SLM-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2
+; SLM-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3
+; SLM-NEXT: ret <4 x double> [[R3]]
+;
+; AVX-LABEL: @buildvector_div_4f64(
+; AVX-NEXT: [[TMP1:%.*]] = fdiv <4 x double> [[A:%.*]], [[B:%.*]]
+; AVX-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+; AVX-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
+; AVX-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+; AVX-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1
+; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+; AVX-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2
+; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+; AVX-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3
+; AVX-NEXT: ret <4 x double> [[R3]]
;
%a0 = extractelement <4 x double> %a, i32 0
%a1 = extractelement <4 x double> %a, i32 1
@@ -745,25 +796,80 @@ define <8 x double> @buildvector_mul_8f64(<8 x double> %a, <8 x double> %b) {
}
define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) {
-; CHECK-LABEL: @buildvector_div_8f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
-; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
-; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2
-; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3
-; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4
-; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5
-; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6
-; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7
-; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7
-; CHECK-NEXT: ret <8 x double> [[R7]]
+; SSE-LABEL: @buildvector_div_8f64(
+; SSE-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
+; SSE-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
+; SSE-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
+; SSE-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
+; SSE-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1
+; SSE-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2
+; SSE-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2
+; SSE-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3
+; SSE-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3
+; SSE-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4
+; SSE-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4
+; SSE-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5
+; SSE-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5
+; SSE-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6
+; SSE-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6
+; SSE-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7
+; SSE-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7
+; SSE-NEXT: ret <8 x double> [[R7]]
+;
+; SLM-LABEL: @buildvector_div_8f64(
+; SLM-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0
+; SLM-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1
+; SLM-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2
+; SLM-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3
+; SLM-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4
+; SLM-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5
+; SLM-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6
+; SLM-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7
+; SLM-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0
+; SLM-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1
+; SLM-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2
+; SLM-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3
+; SLM-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4
+; SLM-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5
+; SLM-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6
+; SLM-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7
+; SLM-NEXT: [[C0:%.*]] = fdiv double [[A0]], [[B0]]
+; SLM-NEXT: [[C1:%.*]] = fdiv double [[A1]], [[B1]]
+; SLM-NEXT: [[C2:%.*]] = fdiv double [[A2]], [[B2]]
+; SLM-NEXT: [[C3:%.*]] = fdiv double [[A3]], [[B3]]
+; SLM-NEXT: [[C4:%.*]] = fdiv double [[A4]], [[B4]]
+; SLM-NEXT: [[C5:%.*]] = fdiv double [[A5]], [[B5]]
+; SLM-NEXT: [[C6:%.*]] = fdiv double [[A6]], [[B6]]
+; SLM-NEXT: [[C7:%.*]] = fdiv double [[A7]], [[B7]]
+; SLM-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0
+; SLM-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1
+; SLM-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2
+; SLM-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3
+; SLM-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4
+; SLM-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5
+; SLM-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6
+; SLM-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7
+; SLM-NEXT: ret <8 x double> [[R7]]
+;
+; AVX-LABEL: @buildvector_div_8f64(
+; AVX-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
+; AVX-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
+; AVX-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
+; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
+; AVX-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1
+; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2
+; AVX-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2
+; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3
+; AVX-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3
+; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4
+; AVX-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4
+; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5
+; AVX-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5
+; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6
+; AVX-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6
+; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7
+; AVX-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7
+; AVX-NEXT: ret <8 x double> [[R7]]
;
%a0 = extractelement <8 x double> %a, i32 0
%a1 = extractelement <8 x double> %a, i32 1
diff --git a/test/Transforms/SLPVectorizer/X86/arith-mul.ll b/test/Transforms/SLPVectorizer/X86/arith-mul.ll
index 95875d7f01fd1..4763a9a2bf12b 100644
--- a/test/Transforms/SLPVectorizer/X86/arith-mul.ll
+++ b/test/Transforms/SLPVectorizer/X86/arith-mul.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
@@ -54,6 +55,41 @@ define void @mul_v8i64() {
; SSE-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
; SSE-NEXT: ret void
;
+; SLM-LABEL: @mul_v8i64(
+; SLM-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
+; SLM-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
+; SLM-NEXT: [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
+; SLM-NEXT: [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
+; SLM-NEXT: [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
+; SLM-NEXT: [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
+; SLM-NEXT: [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
+; SLM-NEXT: [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
+; SLM-NEXT: [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
+; SLM-NEXT: [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
+; SLM-NEXT: [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
+; SLM-NEXT: [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
+; SLM-NEXT: [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
+; SLM-NEXT: [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
+; SLM-NEXT: [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
+; SLM-NEXT: [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SLM-NEXT: [[R0:%.*]] = mul i64 [[A0]], [[B0]]
+; SLM-NEXT: [[R1:%.*]] = mul i64 [[A1]], [[B1]]
+; SLM-NEXT: [[R2:%.*]] = mul i64 [[A2]], [[B2]]
+; SLM-NEXT: [[R3:%.*]] = mul i64 [[A3]], [[B3]]
+; SLM-NEXT: [[R4:%.*]] = mul i64 [[A4]], [[B4]]
+; SLM-NEXT: [[R5:%.*]] = mul i64 [[A5]], [[B5]]
+; SLM-NEXT: [[R6:%.*]] = mul i64 [[A6]], [[B6]]
+; SLM-NEXT: [[R7:%.*]] = mul i64 [[A7]], [[B7]]
+; SLM-NEXT: store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
+; SLM-NEXT: store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
+; SLM-NEXT: store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
+; SLM-NEXT: store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
+; SLM-NEXT: store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
+; SLM-NEXT: store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
+; SLM-NEXT: store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
+; SLM-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SLM-NEXT: ret void
+;
; AVX1-LABEL: @mul_v8i64(
; AVX1-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
; AVX1-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
@@ -162,6 +198,25 @@ define void @mul_v16i32() {
; SSE-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
; SSE-NEXT: ret void
;
+; SLM-LABEL: @mul_v16i32(
+; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
+; SLM-NEXT: [[TMP10:%.*]] = mul <4 x i32> [[TMP2]], [[TMP6]]
+; SLM-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP3]], [[TMP7]]
+; SLM-NEXT: [[TMP12:%.*]] = mul <4 x i32> [[TMP4]], [[TMP8]]
+; SLM-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
+; SLM-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT: ret void
+;
; AVX-LABEL: @mul_v16i32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
@@ -267,6 +322,25 @@ define void @mul_v32i16() {
; SSE-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
; SSE-NEXT: ret void
;
+; SLM-LABEL: @mul_v32i16(
+; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP9:%.*]] = mul <8 x i16> [[TMP1]], [[TMP5]]
+; SLM-NEXT: [[TMP10:%.*]] = mul <8 x i16> [[TMP2]], [[TMP6]]
+; SLM-NEXT: [[TMP11:%.*]] = mul <8 x i16> [[TMP3]], [[TMP7]]
+; SLM-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP4]], [[TMP8]]
+; SLM-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
+; SLM-NEXT: store <8 x i16> [[TMP10]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT: ret void
+;
; AVX-LABEL: @mul_v32i16(
; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
diff --git a/test/Transforms/SLPVectorizer/X86/arith-sub.ll b/test/Transforms/SLPVectorizer/X86/arith-sub.ll
index 85838369e2266..2bbaaca02d88b 100644
--- a/test/Transforms/SLPVectorizer/X86/arith-sub.ll
+++ b/test/Transforms/SLPVectorizer/X86/arith-sub.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
@@ -38,6 +39,25 @@ define void @sub_v8i64() {
; SSE-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
; SSE-NEXT: ret void
;
+; SLM-LABEL: @sub_v8i64(
+; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP1]], [[TMP5]]
+; SLM-NEXT: [[TMP10:%.*]] = sub <2 x i64> [[TMP2]], [[TMP6]]
+; SLM-NEXT: [[TMP11:%.*]] = sub <2 x i64> [[TMP3]], [[TMP7]]
+; SLM-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[TMP4]], [[TMP8]]
+; SLM-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
+; SLM-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT: ret void
+;
; AVX-LABEL: @sub_v8i64(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -111,6 +131,25 @@ define void @sub_v16i32() {
; SSE-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
; SSE-NEXT: ret void
;
+; SLM-LABEL: @sub_v16i32(
+; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP1]], [[TMP5]]
+; SLM-NEXT: [[TMP10:%.*]] = sub <4 x i32> [[TMP2]], [[TMP6]]
+; SLM-NEXT: [[TMP11:%.*]] = sub <4 x i32> [[TMP3]], [[TMP7]]
+; SLM-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP4]], [[TMP8]]
+; SLM-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
+; SLM-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT: ret void
+;
; AVX-LABEL: @sub_v16i32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
@@ -216,6 +255,25 @@ define void @sub_v32i16() {
; SSE-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
; SSE-NEXT: ret void
;
+; SLM-LABEL: @sub_v32i16(
+; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT: [[TMP9:%.*]] = sub <8 x i16> [[TMP1]], [[TMP5]]
+; SLM-NEXT: [[TMP10:%.*]] = sub <8 x i16> [[TMP2]], [[TMP6]]
+; SLM-NEXT: [[TMP11:%.*]] = sub <8 x i16> [[TMP3]], [[TMP7]]
+; SLM-NEXT: [[TMP12:%.*]] = sub <8 x i16> [[TMP4]], [[TMP8]]
+; SLM-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
+; SLM-NEXT: store <8 x i16> [[TMP10]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT: ret void
+;
; AVX-LABEL: @sub_v32i16(
; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
diff --git a/test/Transforms/SafeStack/X86/debug-loc.ll b/test/Transforms/SafeStack/X86/debug-loc.ll
index 88cda693b2932..d6b217142bfef 100644
--- a/test/Transforms/SafeStack/X86/debug-loc.ll
+++ b/test/Transforms/SafeStack/X86/debug-loc.ll
@@ -37,10 +37,10 @@ entry:
; CHECK-DAG: ![[VAR_ARG]] = !DILocalVariable(name: "zzz"
; 100 aligned up to 8
-; CHECK-DAG: ![[EXPR_ARG]] = !DIExpression(DW_OP_minus, 104)
+; CHECK-DAG: ![[EXPR_ARG]] = !DIExpression(DW_OP_constu, 104, DW_OP_minus
; CHECK-DAG: ![[VAR_LOCAL]] = !DILocalVariable(name: "xxx"
-; CHECK-DAG: ![[EXPR_LOCAL]] = !DIExpression(DW_OP_minus, 208)
+; CHECK-DAG: ![[EXPR_LOCAL]] = !DIExpression(DW_OP_constu, 208, DW_OP_minus
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
diff --git a/test/Transforms/SafeStack/X86/debug-loc2.ll b/test/Transforms/SafeStack/X86/debug-loc2.ll
index 8059a722fd45c..731516c3c65ed 100644
--- a/test/Transforms/SafeStack/X86/debug-loc2.ll
+++ b/test/Transforms/SafeStack/X86/debug-loc2.ll
@@ -84,8 +84,8 @@ attributes #4 = { nounwind }
!13 = !DILocation(line: 5, column: 3, scope: !6)
!14 = !DILocation(line: 6, column: 3, scope: !6)
-; CHECK-DAG: ![[X1_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_minus, 4)
-; CHECK-DAG: ![[X2_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_minus, 8)
+; CHECK-DAG: ![[X1_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_constu, 4, DW_OP_minus)
+; CHECK-DAG: ![[X2_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_constu, 8, DW_OP_minus)
!15 = !DIExpression(DW_OP_deref)
!16 = !DILocation(line: 5, column: 7, scope: !6)
!17 = !DILocation(line: 8, column: 3, scope: !6)
@@ -95,4 +95,4 @@ attributes #4 = { nounwind }
!21 = !DILocation(line: 10, column: 1, scope: !22)
!22 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 1)
!23 = !DIExpression()
-!24 = !DIExpression(DW_OP_minus, 42)
+!24 = !DIExpression(DW_OP_constu, 42, DW_OP_minus)
diff --git a/test/Transforms/Util/PredicateInfo/pr33456.ll b/test/Transforms/Util/PredicateInfo/pr33456.ll
new file mode 100644
index 0000000000000..f1cc83a071b96
--- /dev/null
+++ b/test/Transforms/Util/PredicateInfo/pr33456.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s
+; Don't insert predicate info for conditions with a single target.
+@a = global i32 1, align 4
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+@e = common global i32 0, align 4
+
+define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @d, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP13:%.*]]
+; CHECK: [[TMP4:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* @c, align 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1
+; CHECK-NEXT: br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP9:%.*]]
+; CHECK: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9]], label [[TMP9]]
+; CHECK: [[DOT0:%.*]] = phi i32 [ [[TMP4]], [[TMP7]] ], [ [[TMP4]], [[TMP7]] ], [ [[DOT1:%.*]], [[TMP13]] ], [ [[TMP4]], [[TMP3]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* @b, align 4
+; CHECK-NEXT: [[TMP11:%.*]] = sdiv i32 [[TMP10]], [[DOT0]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 0
+; CHECK-NEXT: br i1 [[TMP12]], label [[TMP13]], label [[TMP13]]
+; CHECK: [[DOT1]] = phi i32 [ [[DOT0]], [[TMP9]] ], [ [[DOT0]], [[TMP9]] ], [ undef, [[TMP0:%.*]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* @e, align 4
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP9]]
+; CHECK: ret i32 0
+;
+ %1 = load i32, i32* @d, align 4
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %3, label %13
+
+; <label>:3: ; preds = %0
+ %4 = load i32, i32* @a, align 4
+ %5 = load i32, i32* @c, align 4
+ %6 = icmp slt i32 %5, 1
+ br i1 %6, label %7, label %9
+
+; <label>:7: ; preds = %3
+ %8 = icmp eq i32 %4, 0
+ br i1 %8, label %9, label %9
+
+; <label>:9: ; preds = %13, %7, %7, %3
+ %.0 = phi i32 [ %4, %7 ], [ %4, %7 ], [ %.1, %13 ], [ %4, %3 ]
+ %10 = load i32, i32* @b, align 4
+ %11 = sdiv i32 %10, %.0
+ %12 = icmp eq i32 %11, 0
+ br i1 %12, label %13, label %13
+
+; <label>:13: ; preds = %9, %9, %0
+ %.1 = phi i32 [ %.0, %9 ], [ %.0, %9 ], [ undef, %0 ]
+ %14 = load i32, i32* @e, align 4
+ %15 = icmp eq i32 %14, 0
+ br i1 %15, label %16, label %9
+
+; <label>:16: ; preds = %13
+ ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
diff --git a/test/Transforms/Util/PredicateInfo/pr33457.ll b/test/Transforms/Util/PredicateInfo/pr33457.ll
new file mode 100644
index 0000000000000..b975ade9321db
--- /dev/null
+++ b/test/Transforms/Util/PredicateInfo/pr33457.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s
+; Don't insert predicate info for conditions with a single target.
+@a = global i32 6, align 4
+@c = global i32 -1, align 4
+@e = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+@d = common global i32 0, align 4
+@b = common global [6 x i32] zeroinitializer, align 16
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK-NEXT: store i32 6, i32* @e, align 4
+; CHECK-NEXT: br label [[TMP1:%.*]]
+; CHECK: [[TMP2:%.*]] = load i32, i32* @d, align 4
+; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [6 x i32], [6 x i32]* @b, i64 0, i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label %thread-pre-split, label [[TMP9:%.*]]
+; CHECK: [[TMP10:%.*]] = load i32, i32* @e, align 4
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP12]]
+; CHECK: thread-pre-split:
+; CHECK-NEXT: [[DOTPR:%.*]] = load i32, i32* @e, align 4
+; CHECK-NEXT: br label [[TMP12]]
+; CHECK: [[TMP13:%.*]] = phi i32 [ [[DOTPR]], %thread-pre-split ], [ [[TMP10]], [[TMP9]] ], [ [[TMP10]], [[TMP9]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP15]]
+; CHECK: br i1 [[TMP14]], label [[TMP16:%.*]], label [[TMP17:%.*]]
+; CHECK: br label [[TMP17]]
+; CHECK: [[DOT0:%.*]] = phi i32 [ 1, [[TMP16]] ], [ -1, [[TMP15]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[DOT0]], 8693
+; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* @c, align 4
+; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = xor i32 [[TMP20]], -1
+; CHECK-NEXT: store i32 [[TMP21]], i32* @d, align 4
+; CHECK-NEXT: [[TMP22:%.*]] = icmp slt i32 [[TMP20]], -2
+; CHECK-NEXT: br i1 [[TMP22]], label [[TMP1]], label [[TMP23:%.*]]
+; CHECK: ret i32 0
+;
+ store i32 6, i32* @e, align 4
+ br label %1
+
+; <label>:1: ; preds = %17, %0
+ %2 = load i32, i32* @d, align 4
+ %3 = sext i32 %2 to i64
+ %4 = getelementptr inbounds [6 x i32], [6 x i32]* @b, i64 0, i64 %3
+ %5 = load i32, i32* %4, align 4
+ %6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %5) #2
+ %7 = load i32, i32* @a, align 4
+ %8 = icmp eq i32 %7, 0
+ br i1 %8, label %thread-pre-split, label %9
+
+; <label>:9: ; preds = %1
+ %10 = load i32, i32* @e, align 4
+ %11 = icmp eq i32 %10, 0
+ br i1 %11, label %12, label %12
+
+thread-pre-split: ; preds = %1
+ %.pr = load i32, i32* @e, align 4
+ br label %12
+
+; <label>:12: ; preds = %thread-pre-split, %9, %9
+ %13 = phi i32 [ %.pr, %thread-pre-split ], [ %10, %9 ], [ %10, %9 ]
+ %14 = icmp ne i32 %13, 0
+ br i1 %14, label %15, label %15
+
+; <label>:15: ; preds = %12, %12
+ br i1 %14, label %16, label %17
+
+; <label>:16: ; preds = %15
+ br label %17
+
+; <label>:17: ; preds = %16, %15
+ %.0 = phi i32 [ 1, %16 ], [ -1, %15 ]
+ %18 = and i32 %.0, 8693
+ %19 = load i32, i32* @c, align 4
+ %20 = xor i32 %18, %19
+ %21 = xor i32 %20, -1
+ store i32 %21, i32* @d, align 4
+ %22 = icmp slt i32 %20, -2
+ br i1 %22, label %1, label %23
+
+; <label>:23: ; preds = %17
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+