summaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-05-29 16:25:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-05-29 16:25:25 +0000
commitab44ce3d598882e51a25eb82eb7ae6308de85ae6 (patch)
tree568d786a59d49bef961dcb9bd09d422701b9da5b /test/Transforms
parentb5630dbadf9a2a06754194387d6b0fd9962a67f1 (diff)
Notes
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/Coroutines/coro-debug.ll142
-rw-r--r--test/Transforms/Coroutines/coro-frame.ll11
-rw-r--r--test/Transforms/Coroutines/coro-materialize.ll52
-rw-r--r--test/Transforms/EarlyCSE/const-speculation.ll39
-rw-r--r--test/Transforms/GVN/PRE/phi-translate-2.ll105
-rw-r--r--test/Transforms/GVN/PRE/pre-gep-load.ll2
-rw-r--r--test/Transforms/GVN/PRE/pre-load.ll6
-rw-r--r--test/Transforms/GVNSink/dither.ll42
-rw-r--r--test/Transforms/GVNSink/indirect-call.ll70
-rw-r--r--test/Transforms/GVNSink/sink-common-code.ll694
-rw-r--r--test/Transforms/GVNSink/struct.ll71
-rw-r--r--test/Transforms/GlobalDCE/externally_available.ll19
-rw-r--r--test/Transforms/Inline/prof-update-instr.ll57
-rw-r--r--test/Transforms/Inline/prof-update-sample.ll (renamed from test/Transforms/Inline/prof-update.ll)0
-rw-r--r--test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll19
-rw-r--r--test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll87
-rw-r--r--test/Transforms/InstCombine/alloca.ll7
-rw-r--r--test/Transforms/InstCombine/bitcast-vec-canon.ll37
-rw-r--r--test/Transforms/InstCombine/bitcast.ll45
-rw-r--r--test/Transforms/InstCombine/ctpop.ll27
-rw-r--r--test/Transforms/InstCombine/icmp-xor-signbit.ll228
-rw-r--r--test/Transforms/InstCombine/icmp.ll64
-rw-r--r--test/Transforms/InstSimplify/call.ll253
-rw-r--r--test/Transforms/InstSimplify/or.ll41
-rw-r--r--test/Transforms/JumpThreading/assume.ll145
-rw-r--r--test/Transforms/JumpThreading/fold-not-thread.ll4
-rw-r--r--test/Transforms/JumpThreading/guards.ll87
-rw-r--r--test/Transforms/LoopIdiom/pr33114.ll35
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll12
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll (renamed from test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll)26
-rw-r--r--test/Transforms/LoopStrengthReduce/nonintegral.ll45
-rw-r--r--test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll4
-rw-r--r--test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll26
-rw-r--r--test/Transforms/LoopVectorize/SystemZ/addressing.ll72
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll124
-rw-r--r--test/Transforms/NewGVN/pr32403.ll3
-rw-r--r--test/Transforms/NewGVN/pr32836.ll45
-rw-r--r--test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll61
38 files changed, 2542 insertions, 265 deletions
diff --git a/test/Transforms/Coroutines/coro-debug.ll b/test/Transforms/Coroutines/coro-debug.ll
new file mode 100644
index 000000000000..4da545499f94
--- /dev/null
+++ b/test/Transforms/Coroutines/coro-debug.ll
@@ -0,0 +1,142 @@
+; Tests that debug information is sane after coro-split
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+source_filename = "simple-repro.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind
+define i8* @f(i32 %x) #0 !dbg !6 {
+entry:
+ %x.addr = alloca i32, align 4
+ %coro_hdl = alloca i8*, align 8
+ store i32 %x, i32* %x.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+ call void @llvm.dbg.declare(metadata i8** %coro_hdl, metadata !15, metadata !13), !dbg !16
+ %0 = call token @llvm.coro.id(i32 0, i8* null, i8* bitcast (i8* (i32)* @f to i8*), i8* null), !dbg !16
+ %1 = call i64 @llvm.coro.size.i64(), !dbg !16
+ %call = call i8* @malloc(i64 %1), !dbg !16
+ %2 = call i8* @llvm.coro.begin(token %0, i8* %call) #7, !dbg !16
+ store i8* %2, i8** %coro_hdl, align 8, !dbg !16
+ %3 = call i8 @llvm.coro.suspend(token none, i1 false), !dbg !17
+ %conv = sext i8 %3 to i32, !dbg !17
+ call void @coro.devirt.trigger(i8* null)
+ switch i32 %conv, label %sw.default [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ ], !dbg !17
+
+sw.bb: ; preds = %entry
+ br label %sw.epilog, !dbg !18
+
+sw.bb1: ; preds = %entry
+ br label %coro_Cleanup, !dbg !18
+
+sw.default: ; preds = %entry
+ br label %coro_Suspend, !dbg !18
+
+sw.epilog: ; preds = %sw.bb
+ %4 = load i32, i32* %x.addr, align 4, !dbg !20
+ %add = add nsw i32 %4, 1, !dbg !21
+ store i32 %add, i32* %x.addr, align 4, !dbg !22
+ br label %coro_Cleanup, !dbg !23
+
+coro_Cleanup: ; preds = %sw.epilog, %sw.bb1
+ %5 = load i8*, i8** %coro_hdl, align 8, !dbg !24
+ %6 = call i8* @llvm.coro.free(token %0, i8* %5), !dbg !24
+ call void @free(i8* %6), !dbg !24
+ br label %coro_Suspend, !dbg !24
+
+coro_Suspend: ; preds = %coro_Cleanup, %sw.default
+ %7 = call i1 @llvm.coro.end(i8* null, i1 false) #7, !dbg !24
+ %8 = load i8*, i8** %coro_hdl, align 8, !dbg !24
+ ret i8* %8, !dbg !24
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: argmemonly nounwind readonly
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #2
+
+declare i8* @malloc(i64) #3
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.coro.size.i64() #4
+
+; Function Attrs: nounwind
+declare i8* @llvm.coro.begin(token, i8* writeonly) #5
+
+; Function Attrs: nounwind
+declare i8 @llvm.coro.suspend(token, i1) #5
+
+declare void @free(i8*) #3
+
+; Function Attrs: argmemonly nounwind readonly
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2
+
+; Function Attrs: nounwind
+declare i1 @llvm.coro.end(i8*, i1) #5
+
+; Function Attrs: alwaysinline
+define private void @coro.devirt.trigger(i8*) #6 {
+entry:
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #2
+
+attributes #0 = { noinline nounwind "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { argmemonly nounwind readonly }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind readnone }
+attributes #5 = { nounwind }
+attributes #6 = { alwaysinline }
+attributes #7 = { noduplicate }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (http://llvm.org/git/clang.git 97b002238b11ff30d94d0516d6a0515db5725fd8) (http://llvm.org/git/llvm.git 0cb060ba567f1aa5b4b04e86665f88e4632b528a)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "<stdin>", directory: "C:\5CGitHub\5Cllvm\5Cbuild\5CDebug\5Cbin")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git 97b002238b11ff30d94d0516d6a0515db5725fd8) (http://llvm.org/git/llvm.git 0cb060ba567f1aa5b4b04e86665f88e4632b528a)"}
+!6 = distinct !DISubprogram(name: "f", linkageName: "flink", scope: !7, file: !7, line: 55, type: !8, isLocal: false, isDefinition: true, scopeLine: 55, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!7 = !DIFile(filename: "simple-repro.c", directory: "C:\5CGitHub\5Cllvm\5Cbuild\5CDebug\5Cbin")
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !11}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !DILocalVariable(name: "x", arg: 1, scope: !6, file: !7, line: 55, type: !11)
+!13 = !DIExpression()
+!14 = !DILocation(line: 55, column: 13, scope: !6)
+!15 = !DILocalVariable(name: "coro_hdl", scope: !6, file: !7, line: 56, type: !10)
+!16 = !DILocation(line: 56, column: 3, scope: !6)
+!17 = !DILocation(line: 58, column: 5, scope: !6)
+!18 = !DILocation(line: 58, column: 5, scope: !19)
+!19 = distinct !DILexicalBlock(scope: !6, file: !7, line: 58, column: 5)
+!20 = !DILocation(line: 59, column: 9, scope: !6)
+!21 = !DILocation(line: 59, column: 10, scope: !6)
+!22 = !DILocation(line: 59, column: 7, scope: !6)
+!23 = !DILocation(line: 59, column: 5, scope: !6)
+!24 = !DILocation(line: 62, column: 3, scope: !6)
+
+; CHECK: define i8* @f(i32 %x) #0 !dbg ![[ORIG:[0-9]+]]
+; CHECK: define internal fastcc void @f.resume(%f.Frame* %FramePtr) #0 !dbg ![[RESUME:[0-9]+]]
+; CHECK: define internal fastcc void @f.destroy(%f.Frame* %FramePtr) #0 !dbg ![[DESTROY:[0-9]+]]
+; CHECK: define internal fastcc void @f.cleanup(%f.Frame* %FramePtr) #0 !dbg ![[CLEANUP:[0-9]+]]
+
+; CHECK: ![[ORIG]] = distinct !DISubprogram(name: "f", linkageName: "flink"
+; CHECK: !DILocalVariable(name: "x", arg: 1, scope: ![[ORIG]]
+
+; CHECK: ![[RESUME]] = distinct !DISubprogram(name: "f", linkageName: "flink"
+; CHECK: !DILocalVariable(name: "x", arg: 1, scope: ![[RESUME]]
+
+; CHECK: ![[DESTROY]] = distinct !DISubprogram(name: "f", linkageName: "flink"
+
+; CHECK: ![[CLEANUP]] = distinct !DISubprogram(name: "f", linkageName: "flink"
diff --git a/test/Transforms/Coroutines/coro-frame.ll b/test/Transforms/Coroutines/coro-frame.ll
index 001012fcd0c9..826d3a04fa1e 100644
--- a/test/Transforms/Coroutines/coro-frame.ll
+++ b/test/Transforms/Coroutines/coro-frame.ll
@@ -1,8 +1,11 @@
; Check that we can handle spills of the result of the invoke instruction
; RUN: opt < %s -coro-split -S | FileCheck %s
-define i8* @f() "coroutine.presplit"="1" personality i32 0 {
+define i8* @f(i64 %this) "coroutine.presplit"="1" personality i32 0 {
entry:
+ %this.addr = alloca i64
+ store i64 %this, i64* %this.addr
+ %this1 = load i64, i64* %this.addr
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
@@ -15,6 +18,7 @@ cont:
i8 1, label %cleanup]
resume:
call double @print(double %r)
+ call void @print2(i64 %this1)
br label %cleanup
cleanup:
@@ -30,12 +34,12 @@ pad:
}
; See if the float was added to the frame
-; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, double }
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i64, double }
; See if the float was spilled into the frame
; CHECK-LABEL: @f(
; CHECK: %r = call double @print(
-; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
; CHECK: store double %r, double* %r.spill.addr
; CHECK: ret i8* %hdl
@@ -58,4 +62,5 @@ declare i1 @llvm.coro.end(i8*, i1)
declare noalias i8* @malloc(i32)
declare double @print(double)
+declare void @print2(i64)
declare void @free(i8*)
diff --git a/test/Transforms/Coroutines/coro-materialize.ll b/test/Transforms/Coroutines/coro-materialize.ll
new file mode 100644
index 000000000000..95e8a049ad2f
--- /dev/null
+++ b/test/Transforms/Coroutines/coro-materialize.ll
@@ -0,0 +1,52 @@
+; Verifies that we materialize instruction across suspend points
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define i8* @f(i32 %n) "coroutine.presplit"="1" {
+entry:
+ %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+ %size = call i32 @llvm.coro.size.i32()
+ %alloc = call i8* @malloc(i32 %size)
+ %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+ %inc1 = add i32 %n, 1
+ %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %sp1, label %suspend [i8 0, label %resume1
+ i8 1, label %cleanup]
+resume1:
+ %inc2 = add i32 %inc1, 1
+ %sp2 = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %sp1, label %suspend [i8 0, label %resume2
+ i8 1, label %cleanup]
+
+resume2:
+ call void @print(i32 %inc1)
+ call void @print(i32 %inc2)
+ br label %cleanup
+
+cleanup:
+ %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+ call void @free(i8* %mem)
+ br label %suspend
+suspend:
+ call i1 @llvm.coro.end(i8* %hdl, i1 0)
+ ret i8* %hdl
+}
+
+; See that we only spilled one value
+; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32 }
+; CHECK-LABEL: @f(
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8 @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)
diff --git a/test/Transforms/EarlyCSE/const-speculation.ll b/test/Transforms/EarlyCSE/const-speculation.ll
new file mode 100644
index 000000000000..5b7f2f5b6982
--- /dev/null
+++ b/test/Transforms/EarlyCSE/const-speculation.ll
@@ -0,0 +1,39 @@
+; RUN: opt -early-cse -S %s | FileCheck %s
+
+%mystruct = type { i32 }
+
+; @var is global so that *every* GEP argument is Constant.
+@var = external global %mystruct
+
+; Control flow is to make the dominance tree consider the final icmp before it
+; gets to simplify the purely constant one (%tst). Since that icmp uses the
+; select that gets considered next. Finally the select simplification looks at
+; the %tst icmp and we don't want it to speculate about what happens if "i32 0"
+; is actually "i32 1", broken universes are automatic UB.
+;
+; In this case doing the speculation would create an invalid GEP(@var, 0, 1) and
+; crash.
+
+define i1 @test_constant_speculation() {
+; CHECK-LABEL: define i1 @test_constant_speculation
+entry:
+ br i1 undef, label %end, label %select
+
+select:
+; CHECK: select:
+; CHECK-NOT: icmp
+; CHECK-NOT: getelementptr
+; CHECK-NOT: select
+
+ %tst = icmp eq i32 1, 0
+ %elt = getelementptr %mystruct, %mystruct* @var, i64 0, i32 0
+ %sel = select i1 %tst, i32* null, i32* %elt
+ br label %end
+
+end:
+; CHECK: end:
+; CHECK: %tmp = phi i32* [ null, %entry ], [ getelementptr inbounds (%mystruct, %mystruct* @var, i64 0, i32 0), %select ]
+ %tmp = phi i32* [null, %entry], [%sel, %select]
+ %res = icmp eq i32* %tmp, null
+ ret i1 %res
+}
diff --git a/test/Transforms/GVN/PRE/phi-translate-2.ll b/test/Transforms/GVN/PRE/phi-translate-2.ll
new file mode 100644
index 000000000000..b2993657c7f5
--- /dev/null
+++ b/test/Transforms/GVN/PRE/phi-translate-2.ll
@@ -0,0 +1,105 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@a = common global [100 x i64] zeroinitializer, align 16
+@b = common global [100 x i64] zeroinitializer, align 16
+@g1 = common global i64 0, align 8
+@g2 = common global i64 0, align 8
+@g3 = common global i64 0, align 8
+declare i64 @goo(...) local_unnamed_addr #1
+
+define void @test1(i64 %a, i64 %b, i64 %c, i64 %d) {
+entry:
+ %mul = mul nsw i64 %b, %a
+ store i64 %mul, i64* @g1, align 8
+ %t0 = load i64, i64* @g2, align 8
+ %cmp = icmp sgt i64 %t0, 3
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %mul2 = mul nsw i64 %d, %c
+ store i64 %mul2, i64* @g2, align 8
+ br label %if.end
+
+; Check phi-translate works and mul is removed.
+; CHECK-LABEL: @test1(
+; CHECK: if.end:
+; CHECK: %[[MULPHI:.*]] = phi i64 [ {{.*}}, %if.then ], [ %mul, %entry ]
+; CHECK-NOT: = mul
+; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
+if.end: ; preds = %if.then, %entry
+ %b.addr.0 = phi i64 [ %d, %if.then ], [ %b, %entry ]
+ %a.addr.0 = phi i64 [ %c, %if.then ], [ %a, %entry ]
+ %mul3 = mul nsw i64 %a.addr.0, %b.addr.0
+ store i64 %mul3, i64* @g3, align 8
+ ret void
+}
+
+define void @test2(i64 %i) {
+entry:
+ %arrayidx = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i
+ %t0 = load i64, i64* %arrayidx, align 8
+ %arrayidx1 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i
+ %t1 = load i64, i64* %arrayidx1, align 8
+ %mul = mul nsw i64 %t1, %t0
+ store i64 %mul, i64* @g1, align 8
+ %cmp = icmp sgt i64 %mul, 3
+ br i1 %cmp, label %if.then, label %if.end
+
+; Check phi-translate works for the phi generated by loadpre. A new mul will be
+; inserted in if.then block.
+; CHECK-LABEL: @test2(
+; CHECK: if.then:
+; CHECK: %[[MUL_THEN:.*]] = mul
+; CHECK: br label %if.end
+if.then: ; preds = %entry
+ %call = tail call i64 (...) @goo() #2
+ store i64 %call, i64* @g2, align 8
+ br label %if.end
+
+; CHECK: if.end:
+; CHECK: %[[MULPHI:.*]] = phi i64 [ %[[MUL_THEN]], %if.then ], [ %mul, %entry ]
+; CHECK-NOT: = mul
+; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
+if.end: ; preds = %if.then, %entry
+ %i.addr.0 = phi i64 [ 3, %if.then ], [ %i, %entry ]
+ %arrayidx3 = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i.addr.0
+ %t2 = load i64, i64* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i.addr.0
+ %t3 = load i64, i64* %arrayidx4, align 8
+ %mul5 = mul nsw i64 %t3, %t2
+ store i64 %mul5, i64* @g3, align 8
+ ret void
+}
+
+; Check phi-translate doesn't go through backedge, which may lead to incorrect
+; pre transformation.
+; CHECK: for.end:
+; CHECK-NOT: %{{.*pre-phi}} = phi
+; CHECK: ret void
+define void @test3(i64 %N, i64* nocapture readonly %a) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ %add = add nuw nsw i64 %i.0, 1
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 %add
+ %tmp0 = load i64, i64* %arrayidx, align 8
+ %cmp = icmp slt i64 %i.0, %N
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %call = tail call i64 (...) @goo() #2
+ %add1 = sub nsw i64 0, %call
+ %tobool = icmp eq i64 %tmp0, %add1
+ br i1 %tobool, label %for.cond, label %for.end
+
+for.end: ; preds = %for.body, %for.cond
+ %i.0.lcssa = phi i64 [ %i.0, %for.body ], [ %i.0, %for.cond ]
+ %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.0.lcssa
+ %tmp1 = load i64, i64* %arrayidx2, align 8
+ store i64 %tmp1, i64* @g1, align 8
+ ret void
+}
+
diff --git a/test/Transforms/GVN/PRE/pre-gep-load.ll b/test/Transforms/GVN/PRE/pre-gep-load.ll
index 9eec8bb6455b..1b2b4d20d31d 100644
--- a/test/Transforms/GVN/PRE/pre-gep-load.ll
+++ b/test/Transforms/GVN/PRE/pre-gep-load.ll
@@ -37,7 +37,7 @@ sw.bb2: ; preds = %if.end, %entry
%3 = load double, double* %arrayidx5, align 8
; CHECK: sw.bb2:
; CHECK-NOT: sext
-; CHECK-NEXT: phi double [
+; CHECK: phi double [
; CHECK-NOT: load
%sub6 = fsub double 3.000000e+00, %3
br label %return
diff --git a/test/Transforms/GVN/PRE/pre-load.ll b/test/Transforms/GVN/PRE/pre-load.ll
index 685df24f62b6..ffff2b7f08e5 100644
--- a/test/Transforms/GVN/PRE/pre-load.ll
+++ b/test/Transforms/GVN/PRE/pre-load.ll
@@ -72,7 +72,7 @@ block4:
%PRE = load i32, i32* %P3
ret i32 %PRE
; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}
@@ -104,7 +104,7 @@ block4:
%PRE = load i32, i32* %P3
ret i32 %PRE
; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}
@@ -263,7 +263,7 @@ block4:
%PRE = load i32, i32* %P3
ret i32 %PRE
; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}
diff --git a/test/Transforms/GVNSink/dither.ll b/test/Transforms/GVNSink/dither.ll
new file mode 100644
index 000000000000..9717021aca82
--- /dev/null
+++ b/test/Transforms/GVNSink/dither.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -S -gvn-sink | FileCheck %s
+
+; Because %tmp17 has flipped operands to its equivalents %tmp14 and %tmp7, we
+; can't sink the zext as we'd need a shuffling PHI in between.
+;
+; Just sinking the zext isn't profitable, so ensure nothing is sunk.
+
+; CHECK-LABEL: @hoge
+; CHECK-NOT: bb18.gvnsink.split
+define void @hoge() {
+bb:
+ br i1 undef, label %bb4, label %bb11
+
+bb4: ; preds = %bb3
+ br i1 undef, label %bb6, label %bb8
+
+bb6: ; preds = %bb5
+ %tmp = zext i16 undef to i64
+ %tmp7 = add i64 %tmp, undef
+ br label %bb18
+
+bb8: ; preds = %bb5
+ %tmp9 = zext i16 undef to i64
+ br label %bb18
+
+bb11: ; preds = %bb10
+ br i1 undef, label %bb12, label %bb15
+
+bb12: ; preds = %bb11
+ %tmp13 = zext i16 undef to i64
+ %tmp14 = add i64 %tmp13, undef
+ br label %bb18
+
+bb15: ; preds = %bb11
+ %tmp16 = zext i16 undef to i64
+ %tmp17 = add i64 undef, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb15, %bb12, %bb8, %bb6
+ %tmp19 = phi i64 [ %tmp7, %bb6 ], [ undef, %bb8 ], [ %tmp14, %bb12 ], [ %tmp17, %bb15 ]
+ unreachable
+}
diff --git a/test/Transforms/GVNSink/indirect-call.ll b/test/Transforms/GVNSink/indirect-call.ll
new file mode 100644
index 000000000000..da98ed0819a6
--- /dev/null
+++ b/test/Transforms/GVNSink/indirect-call.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s
+
+declare i8 @ext(i1)
+
+define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) {
+entry:
+ %cmp = icmp uge i32 %blksA, %nblks
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test1
+; CHECK: call i8 @ext
+; CHECK: call i8 %ext
+if.then:
+ %frombool1 = call i8 @ext(i1 %cmp)
+ br label %if.end
+
+if.else:
+ %frombool3 = call i8 %ext(i1 %cmp)
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) {
+entry:
+ %cmp = icmp uge i32 %blksA, %nblks
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test2
+; CHECK: call i8 %ext
+; CHECK-NOT: call
+if.then:
+ %frombool1 = call i8 %ext(i1 %cmp)
+ br label %if.end
+
+if.else:
+ %frombool3 = call i8 %ext(i1 %cmp)
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+define zeroext i1 @test3(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext1, i8(i1)* %ext2) {
+entry:
+ %cmp = icmp uge i32 %blksA, %nblks
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test3
+; CHECK: %[[x:.*]] = select i1 %flag, i8 (i1)* %ext1, i8 (i1)* %ext2
+; CHECK: call i8 %[[x]](i1 %cmp)
+; CHECK-NOT: call
+if.then:
+ %frombool1 = call i8 %ext1(i1 %cmp)
+ br label %if.end
+
+if.else:
+ %frombool3 = call i8 %ext2(i1 %cmp)
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
diff --git a/test/Transforms/GVNSink/sink-common-code.ll b/test/Transforms/GVNSink/sink-common-code.ll
new file mode 100644
index 000000000000..d9e757cd10fc
--- /dev/null
+++ b/test/Transforms/GVNSink/sink-common-code.ll
@@ -0,0 +1,694 @@
+; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s
+
+define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test1
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.else:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test2
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.else:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp uge i32 %blksA, %add
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+declare i32 @foo(i32, i32) nounwind readnone
+
+define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+ %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+ br label %if.end
+
+if.else:
+ %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+ %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+ br label %if.end
+
+if.end:
+ %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+ %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ]
+ %ret = add i32 %xx, %yy
+ ret i32 %ret
+}
+
+; CHECK-LABEL: test3
+; CHECK: select
+; CHECK: call
+; CHECK: call
+; CHECK: add
+; CHECK-NOT: br
+
+define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %x, 5
+ store i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %b = add i32 %x, 7
+ store i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test4
+; CHECK: select
+; CHECK: store
+; CHECK-NOT: store
+
+define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %x, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %b = add i32 %x, 7
+ store i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test5
+; CHECK: store volatile
+; CHECK: store
+
+define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %x, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %b = add i32 %x, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test6
+; CHECK: select
+; CHECK: store volatile
+; CHECK-NOT: store
+
+define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %z = load volatile i32, i32* %y
+ %a = add i32 %z, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %w = load volatile i32, i32* %y
+ %b = add i32 %w, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test7
+; CHECK-DAG: select
+; CHECK-DAG: load volatile
+; CHECK: store volatile
+; CHECK-NOT: load
+; CHECK-NOT: store
+
+; The extra store in %if.then means %z and %w are not equivalent.
+define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ store i32 7, i32* %p
+ %z = load volatile i32, i32* %y
+ store i32 6, i32* %p
+ %a = add i32 %z, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %w = load volatile i32, i32* %y
+ %b = add i32 %w, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test9
+; CHECK: add
+; CHECK: add
+
+%struct.anon = type { i32, i32 }
+
+; The GEP indexes a struct type so cannot have a variable last index.
+define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %x, 5
+ %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0
+ store volatile i32 %x, i32* %gepa
+ br label %if.end
+
+if.else:
+ %dummy1 = add i32 %x, 6
+ %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+ store volatile i32 %x, i32* %gepb
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test10
+; CHECK: getelementptr
+; CHECK: store volatile
+; CHECK: getelementptr
+; CHECK: store volatile
+
+; The shufflevector's mask operand cannot be merged in a PHI.
+define i32 @test11(i1 zeroext %flag, i32 %w, <2 x i32> %x, <2 x i32> %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %w, 5
+ %sv1 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 0, i32 1>
+ br label %if.end
+
+if.else:
+ %dummy1 = add i32 %w, 6
+ %sv2 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 1, i32 0>
+ br label %if.end
+
+if.end:
+ %p = phi <2 x i32> [ %sv1, %if.then ], [ %sv2, %if.else ]
+ ret i32 1
+}
+
+; CHECK-LABEL: test11
+; CHECK: shufflevector
+; CHECK: shufflevector
+
+; We can't common an intrinsic!
+define i32 @test12(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %w, 5
+ %sv1 = call i32 @llvm.ctlz.i32(i32 %x)
+ br label %if.end
+
+if.else:
+ %dummy1 = add i32 %w, 6
+ %sv2 = call i32 @llvm.cttz.i32(i32 %x)
+ br label %if.end
+
+if.end:
+ %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ]
+ ret i32 1
+}
+
+declare i32 @llvm.ctlz.i32(i32 %x) readnone
+declare i32 @llvm.cttz.i32(i32 %x) readnone
+
+; CHECK-LABEL: test12
+; CHECK: call i32 @llvm.ctlz
+; CHECK: call i32 @llvm.cttz
+
+; The TBAA metadata should be properly combined.
+define i32 @test13(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %z = load volatile i32, i32* %y
+ %a = add i32 %z, 5
+ store volatile i32 %a, i32* %y, !tbaa !3
+ br label %if.end
+
+if.else:
+ %w = load volatile i32, i32* %y
+ %b = add i32 %w, 7
+ store volatile i32 %b, i32* %y, !tbaa !4
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+!0 = !{ !"an example type tree" }
+!1 = !{ !"int", !0 }
+!2 = !{ !"float", !0 }
+!3 = !{ !"const float", !2, i64 0 }
+!4 = !{ !"special float", !2, i64 1 }
+
+; CHECK-LABEL: test13
+; CHECK-DAG: select
+; CHECK-DAG: load volatile
+; CHECK: store volatile {{.*}}, !tbaa !0
+; CHECK-NOT: load
+; CHECK-NOT: store
+
+; The call should be commoned.
+define i32 @test13a(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %sv1 = call i32 @bar(i32 %x)
+ br label %if.end
+
+if.else:
+ %sv2 = call i32 @bar(i32 %y)
+ br label %if.end
+
+if.end:
+ %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ]
+ ret i32 1
+}
+declare i32 @bar(i32)
+
+; CHECK-LABEL: test13a
+; CHECK: %[[x:.*]] = select i1 %flag
+; CHECK: call i32 @bar(i32 %[[x]])
+
+; The load should be commoned.
+define i32 @test14(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %x, 1
+ %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+ %sv1 = load i32, i32* %gepa
+ %cmp1 = icmp eq i32 %sv1, 56
+ br label %if.end
+
+if.else:
+ %dummy2 = add i32 %x, 4
+ %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+ %sv2 = load i32, i32* %gepb
+ %cmp2 = icmp eq i32 %sv2, 57
+ br label %if.end
+
+if.end:
+ %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ]
+ ret i32 1
+}
+
+; CHECK-LABEL: test14
+; CHECK: getelementptr
+; CHECK: load
+; CHECK-NOT: load
+
+; The load should be commoned.
+define i32 @test15(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %x, 1
+ %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0
+ %sv1 = load i32, i32* %gepa
+ %ext1 = zext i32 %sv1 to i64
+ %cmp1 = icmp eq i64 %ext1, 56
+ br label %if.end
+
+if.else:
+ %dummy2 = add i32 %x, 4
+ %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+ %sv2 = load i32, i32* %gepb
+ %ext2 = zext i32 %sv2 to i64
+ %cmp2 = icmp eq i64 %ext2, 56
+ br label %if.end
+
+if.end:
+ %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ]
+ ret i32 1
+}
+
+; CHECK-LABEL: test15
+; CHECK: getelementptr
+; CHECK: load
+; CHECK-NOT: load
+
+define zeroext i1 @test_crash(i1 zeroext %flag, i32* %i4, i32* %m, i32* %n) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %tmp1 = load i32, i32* %i4
+ %tmp2 = add i32 %tmp1, -1
+ store i32 %tmp2, i32* %i4
+ br label %if.end
+
+if.else:
+ %tmp3 = load i32, i32* %m
+ %tmp4 = load i32, i32* %n
+ %tmp5 = add i32 %tmp3, %tmp4
+ store i32 %tmp5, i32* %i4
+ br label %if.end
+
+if.end:
+ ret i1 true
+}
+
+; CHECK-LABEL: test_crash
+; No checks for test_crash - just ensure it doesn't crash!
+
+define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) {
+
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.else:
+ br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+; CHECK-LABEL: test16
+; CHECK: zext
+; CHECK: zext
+
+define zeroext i1 @test16a(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks, i8* %p) {
+
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ %b1 = sext i8 %frombool1 to i32
+ %b2 = trunc i32 %b1 to i8
+ store i8 %b2, i8* %p
+ br label %if.end
+
+if.else:
+ br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ %a1 = sext i8 %frombool3 to i32
+ %a2 = trunc i32 %a1 to i8
+ store i8 %a2, i8* %p
+ br label %if.end
+
+if.end:
+ ret i1 true
+}
+
+; CHECK-LABEL: test16a
+; CHECK: zext
+; CHECK-NOT: zext
+
+define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ switch i32 %flag, label %if.end [
+ i32 0, label %if.then
+ i32 1, label %if.then2
+ ]
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = call i8 @i1toi8(i1 %cmp)
+ %a1 = sext i8 %frombool1 to i32
+ %a2 = trunc i32 %a1 to i8
+ br label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = call i8 @i1toi8(i1 %cmp2)
+ %b1 = sext i8 %frombool3 to i32
+ %b2 = trunc i32 %b1 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %a2, %if.then ], [ %b2, %if.then2 ], [ 0, %entry ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+declare i8 @i1toi8(i1)
+
+; FIXME: DISABLED - we don't consider this profitable. We should
+; - Consider argument setup/return mov'ing for calls, like InlineCost does.
+; - Consider the removal of the %obeys.0 PHI (zero PHI movement overall)
+
+; DISABLED-CHECK-LABEL: test17
+; DISABLED-CHECK: if.then:
+; DISABLED-CHECK-NEXT: icmp uge
+; DISABLED-CHECK-NEXT: br label %[[x:.*]]
+
+; DISABLED-CHECK: if.then2:
+; DISABLED-CHECK-NEXT: add
+; DISABLED-CHECK-NEXT: icmp ule
+; DISABLED-CHECK-NEXT: br label %[[x]]
+
+; DISABLED-CHECK: [[x]]:
+; DISABLED-CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp
+; DISABLED-CHECK-NEXT: %[[z:.*]] = call i8 @i1toi8(i1 %[[y]])
+; DISABLED-CHECK-NEXT: br label %if.end
+
+; DISABLED-CHECK: if.end:
+; DISABLED-CHECK-NEXT: phi i8
+; DISABLED-CHECK-DAG: [ %[[z]], %[[x]] ]
+; DISABLED-CHECK-DAG: [ 0, %entry ]
+
+define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ switch i32 %flag, label %if.then3 [
+ i32 0, label %if.then
+ i32 1, label %if.then2
+ ]
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.then3:
+ %add2 = add i32 %nblks, %blksA
+ %cmp3 = icmp ule i32 %add2, %blksA
+ %frombool4 = zext i1 %cmp3 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+; CHECK-LABEL: test18
+; CHECK: if.end:
+; CHECK-NEXT: %[[x:.*]] = phi i1
+; CHECK-DAG: [ %cmp, %if.then ]
+; CHECK-DAG: [ %cmp2, %if.then2 ]
+; CHECK-DAG: [ %cmp3, %if.then3 ]
+; CHECK-NEXT: zext i1 %[[x]] to i8
+
+; The phi is confusing - both add instructions are used by it, but
+; not on their respective unconditional arcs. It should not be
+; optimized.
+define void @test_pr30292(i1 %cond, i1 %cond2, i32 %a, i32 %b) {
+entry:
+ %add1 = add i32 %a, 1
+ br label %succ
+
+one:
+ br i1 %cond, label %two, label %succ
+
+two:
+ call void @g()
+ %add2 = add i32 %a, 1
+ br label %succ
+
+succ:
+ %p = phi i32 [ 0, %entry ], [ %add1, %one ], [ %add2, %two ]
+ br label %one
+}
+declare void @g()
+
+; CHECK-LABEL: test_pr30292
+; CHECK: phi i32 [ 0, %entry ], [ %add1, %succ ], [ %add2, %two ]
+
+define zeroext i1 @test_pr30244(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) {
+
+entry:
+ %p = alloca i8
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ store i8 %frombool1, i8* %p
+ br label %if.end
+
+if.else:
+ br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ store i8 %frombool3, i8* %p
+ br label %if.end
+
+if.end:
+ ret i1 true
+}
+
+; CHECK-LABEL: @test_pr30244
+; CHECK: store
+; CHECK-NOT: store
+
+define i32 @test_pr30373a(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+ %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+ %z0 = lshr i32 %y0, 8
+ br label %if.end
+
+if.else:
+ %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+ %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+ %z1 = lshr exact i32 %y1, 8
+ br label %if.end
+
+if.end:
+ %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+ %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ]
+ %ret = add i32 %xx, %yy
+ ret i32 %ret
+}
+
+; CHECK-LABEL: test_pr30373a
+; CHECK: lshr
+; CHECK-NOT: exact
+; CHECK: }
+
+define i32 @test_pr30373b(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+ %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+ %z0 = lshr exact i32 %y0, 8
+ br label %if.end
+
+if.else:
+ %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+ %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+ %z1 = lshr i32 %y1, 8
+ br label %if.end
+
+if.end:
+ %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+ %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ]
+ %ret = add i32 %xx, %yy
+ ret i32 %ret
+}
+
+; CHECK-LABEL: test_pr30373b
+; CHECK: lshr
+; CHECK-NOT: exact
+; CHECK: }
+
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"float", !2}
+; CHECK: !2 = !{!"an example type tree"}
diff --git a/test/Transforms/GVNSink/struct.ll b/test/Transforms/GVNSink/struct.ll
new file mode 100644
index 000000000000..2228cf2803ae
--- /dev/null
+++ b/test/Transforms/GVNSink/struct.ll
@@ -0,0 +1,71 @@
+; RUN: opt -gvn-sink -S < %s | FileCheck %s
+
+%struct = type {i32, i32}
+%struct2 = type { [ 2 x i32], i32 }
+
+; Struct indices cannot be variant.
+
+; CHECK-LABEL: @f() {
+; CHECK: getelementptr
+; CHECK: getelementptr
+define void @f() {
+bb:
+ br i1 undef, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ %tmp = getelementptr inbounds %struct, %struct* null, i64 0, i32 1
+ br label %bb4
+
+bb2: ; preds = %bb
+ %tmp3 = getelementptr inbounds %struct, %struct* null, i64 0, i32 0
+ br label %bb4
+
+bb4: ; preds = %bb2, %bb1
+ %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ]
+ ret void
+}
+
+; Struct indices cannot be variant.
+
+; CHECK-LABEL: @g() {
+; CHECK: getelementptr
+; CHECK: getelementptr
+define void @g() {
+bb:
+ br i1 undef, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ %tmp = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 1
+ br label %bb4
+
+bb2: ; preds = %bb
+ %tmp3 = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 0
+ br label %bb4
+
+bb4: ; preds = %bb2, %bb1
+ %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ]
+ ret void
+}
+
+
+; ... but the first parameter to a GEP can.
+
+; CHECK-LABEL: @h() {
+; CHECK: getelementptr
+; CHECK-NOT: getelementptr
+define void @h() {
+bb:
+ br i1 undef, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ %tmp = getelementptr inbounds %struct, %struct* null, i32 0, i32 0
+ br label %bb4
+
+bb2: ; preds = %bb
+ %tmp3 = getelementptr inbounds %struct, %struct* null, i32 1, i32 0
+ br label %bb4
+
+bb4: ; preds = %bb2, %bb1
+ %tmp5 = phi i32 [ 0, %bb1 ], [ 1, %bb2 ]
+ ret void
+} \ No newline at end of file
diff --git a/test/Transforms/GlobalDCE/externally_available.ll b/test/Transforms/GlobalDCE/externally_available.ll
index fca49b29ec8e..bc54db38cee0 100644
--- a/test/Transforms/GlobalDCE/externally_available.ll
+++ b/test/Transforms/GlobalDCE/externally_available.ll
@@ -1,12 +1,21 @@
; RUN: opt < %s -globaldce -S | FileCheck %s
+; test_global should not be emitted to the .s file.
+; CHECK-NOT: @test_global =
+@test_global = available_externally global i32 4
+
+; test_global2 is a normal global using an available externally function.
+; CHECK: @test_global2 =
+@test_global2 = global i32 ()* @test_function2
+
; test_function should not be emitted to the .s file.
-; CHECK-NOT: test_function
+; CHECK-NOT: define {{.*}} @test_function()
define available_externally i32 @test_function() {
ret i32 4
}
-; test_global should not be emitted to the .s file.
-; CHECK-NOT: test_global
-@test_global = available_externally global i32 4
-
+; test_function2 isn't actually dead even though it's available externally.
+; CHECK: define available_externally i32 @test_function2()
+define available_externally i32 @test_function2() {
+ ret i32 4
+}
diff --git a/test/Transforms/Inline/prof-update-instr.ll b/test/Transforms/Inline/prof-update-instr.ll
new file mode 100644
index 000000000000..6650165cb904
--- /dev/null
+++ b/test/Transforms/Inline/prof-update-instr.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+; Checks if inliner updates VP metadata for indrect call instructions
+; with instrumentation based profile.
+
+@func = global void ()* null
+@func2 = global void ()* null
+
+; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]]
+define void @callee(i32 %n) !prof !15 {
+ %cond = icmp sle i32 %n, 10
+ br i1 %cond, label %cond_true, label %cond_false, !prof !20
+cond_true:
+; f2 is optimized away, thus not updated.
+ %f2 = load void ()*, void ()** @func2
+; CHECK: call void %f2(), !prof ![[COUNT_IND_CALLEE1:[0-9]*]]
+ call void %f2(), !prof !19
+ ret void
+cond_false:
+ %f = load void ()*, void ()** @func
+; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]]
+ call void %f(), !prof !18
+ ret void
+}
+
+; CHECK: define void @caller()
+define void @caller() !prof !21 {
+; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]]
+ call void @callee(i32 15)
+ ret void
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 2000}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 1000}
+!16 = !{!"branch_weights", i64 2000}
+!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
+!19 = !{!"VP", i32 0, i64 200, i64 111, i64 100, i64 222, i64 60, i64 333, i64 40}
+!20 = !{!"branch_weights", i32 1000, i32 1000}
+!21 = !{!"function_entry_count", i64 400}
+attributes #0 = { alwaysinline }
+; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
+; CHECK: ![[COUNT_IND_CALLEE1]] = !{!"VP", i32 0, i64 200, i64 111, i64 100, i64 222, i64 60, i64 333, i64 40}
+; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}
+; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8}
diff --git a/test/Transforms/Inline/prof-update.ll b/test/Transforms/Inline/prof-update-sample.ll
index 4a4471e8e17a..4a4471e8e17a 100644
--- a/test/Transforms/Inline/prof-update.ll
+++ b/test/Transforms/Inline/prof-update-sample.ll
diff --git a/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll b/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
deleted file mode 100644
index 76e30399a666..000000000000
--- a/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep add
-; RUN: opt < %s -instcombine -S | not grep mul
-; PR2330
-
-define i1 @f(i32 %x, i32 %y) nounwind {
-entry:
- %A = add i32 %x, 5
- %B = add i32 %y, 5
- %C = icmp eq i32 %A, %B
- ret i1 %C
-}
-
-define i1 @g(i32 %x, i32 %y) nounwind {
-entry:
- %A = mul i32 %x, 5
- %B = mul i32 %y, 5
- %C = icmp eq i32 %A, %B
- ret i1 %C
-}
diff --git a/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll b/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
deleted file mode 100644
index b91457c79dea..000000000000
--- a/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
+++ /dev/null
@@ -1,87 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define i1 @test1(i8 %x, i8 %y) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT: [[TMP:%.*]] = icmp ult i8 %x, %y
-; CHECK-NEXT: ret i1 [[TMP]]
-;
- %X = xor i8 %x, 128
- %Y = xor i8 %y, 128
- %tmp = icmp slt i8 %X, %Y
- ret i1 %tmp
-}
-
-define i1 @test2(i8 %x, i8 %y) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: [[TMP:%.*]] = icmp slt i8 %x, %y
-; CHECK-NEXT: ret i1 [[TMP]]
-;
- %X = xor i8 %x, 128
- %Y = xor i8 %y, 128
- %tmp = icmp ult i8 %X, %Y
- ret i1 %tmp
-}
-
-define i1 @test3(i8 %x) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i8 %x, -114
-; CHECK-NEXT: ret i1 [[TMP]]
-;
- %X = xor i8 %x, 128
- %tmp = icmp uge i8 %X, 15
- ret i1 %tmp
-}
-
-define <2 x i1> @test3vec(<2 x i8> %x) {
-; CHECK-LABEL: @test3vec(
-; CHECK-NEXT: [[TMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -114, i8 -114>
-; CHECK-NEXT: ret <2 x i1> [[TMP]]
-;
- %X = xor <2 x i8> %x, <i8 128, i8 128>
- %tmp = icmp uge <2 x i8> %X, <i8 15, i8 15>
- ret <2 x i1> %tmp
-}
-
-define i1 @test4(i8 %x, i8 %y) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT: [[TMP:%.*]] = icmp ugt i8 %x, %y
-; CHECK-NEXT: ret i1 [[TMP]]
-;
- %X = xor i8 %x, 127
- %Y = xor i8 %y, 127
- %tmp = icmp slt i8 %X, %Y
- ret i1 %tmp
-}
-
-define i1 @test5(i8 %x, i8 %y) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i8 %x, %y
-; CHECK-NEXT: ret i1 [[TMP]]
-;
- %X = xor i8 %x, 127
- %Y = xor i8 %y, 127
- %tmp = icmp ult i8 %X, %Y
- ret i1 %tmp
-}
-
-define i1 @test6(i8 %x) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: [[TMP:%.*]] = icmp slt i8 %x, 113
-; CHECK-NEXT: ret i1 [[TMP]]
-;
- %X = xor i8 %x, 127
- %tmp = icmp uge i8 %X, 15
- ret i1 %tmp
-}
-
-define <2 x i1> @test6vec(<2 x i8> %x) {
-; CHECK-LABEL: @test6vec(
-; CHECK-NEXT: [[TMP:%.*]] = icmp slt <2 x i8> %x, <i8 113, i8 113>
-; CHECK-NEXT: ret <2 x i1> [[TMP]]
-;
- %X = xor <2 x i8> %x, <i8 127, i8 127>
- %tmp = icmp uge <2 x i8> %X, <i8 15, i8 15>
- ret <2 x i1> %tmp
-}
-
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index f81f700e6cf4..490830af2d82 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -51,8 +51,8 @@ define i32* @test4(i32 %n) {
ret i32* %A
}
-; Allocas which are only used by GEPs, bitcasts, and stores (transitively)
-; should be deleted.
+; Allocas which are only used by GEPs, bitcasts, addrspacecasts, and stores
+; (transitively) should be deleted.
define void @test5() {
; CHECK-LABEL: @test5(
; CHECK-NOT: alloca
@@ -62,6 +62,7 @@ define void @test5() {
entry:
%a = alloca { i32 }
%b = alloca i32*
+ %c = alloca i32
%a.1 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
store i32 123, i32* %a.1
store i32* %a.1, i32** %b
@@ -73,6 +74,8 @@ entry:
store atomic i32 3, i32* %a.3 release, align 4
%a.4 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
store atomic i32 4, i32* %a.4 seq_cst, align 4
+ %c.1 = addrspacecast i32* %c to i32 addrspace(1)*
+ store i32 123, i32 addrspace(1)* %c.1
ret void
}
diff --git a/test/Transforms/InstCombine/bitcast-vec-canon.ll b/test/Transforms/InstCombine/bitcast-vec-canon.ll
index 97145221099e..a92a7b73fd7e 100644
--- a/test/Transforms/InstCombine/bitcast-vec-canon.ll
+++ b/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -1,41 +1,40 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
define double @a(<1 x i64> %y) {
+; CHECK-LABEL: @a(
+; CHECK-NEXT: [[BC:%.*]] = bitcast <1 x i64> %y to <1 x double>
+; CHECK-NEXT: [[C:%.*]] = extractelement <1 x double> [[BC]], i32 0
+; CHECK-NEXT: ret double [[C]]
+;
%c = bitcast <1 x i64> %y to double
ret double %c
-
-; CHECK-LABEL: @a(
-; CHECK-NEXT: bitcast <1 x i64> %y to <1 x double>
-; CHECK-NEXT: extractelement <1 x double> {{.*}}, i32 0
-; CHECK-NEXT: ret double
}
define i64 @b(<1 x i64> %y) {
+; CHECK-LABEL: @b(
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> %y, i32 0
+; CHECK-NEXT: ret i64 [[TMP1]]
+;
%c = bitcast <1 x i64> %y to i64
ret i64 %c
-
-; CHECK-LABEL: @b(
-; CHECK-NEXT: extractelement <1 x i64> %y, i32 0
-; CHECK-NEXT: ret i64
}
define <1 x i64> @c(double %y) {
+; CHECK-LABEL: @c(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double %y to i64
+; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+; CHECK-NEXT: ret <1 x i64> [[C]]
+;
%c = bitcast double %y to <1 x i64>
ret <1 x i64> %c
-
-; CHECK-LABEL: @c(
-; CHECK-NEXT: bitcast double %y to i64
-; CHECK-NEXT: insertelement <1 x i64> undef, i64 {{.*}}, i32 0
-; CHECK-NEXT: ret <1 x i64>
}
define <1 x i64> @d(i64 %y) {
+; CHECK-LABEL: @d(
+; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 %y, i32 0
+; CHECK-NEXT: ret <1 x i64> [[C]]
+;
%c = bitcast i64 %y to <1 x i64>
ret <1 x i64> %c
-
-; CHECK-LABEL: @d(
-; CHECK-NEXT: insertelement <1 x i64> undef, i64 %y, i32 0
-; CHECK-NEXT: ret <1 x i64>
}
-
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 2e7f30fee14d..4cf3f27ab014 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -70,6 +70,51 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
ret <2 x i32> %t2
}
+; PR26702 - https://bugs.llvm.org//show_bug.cgi?id=26702
+; Bitcast is canonicalized below logic, so we can see the not-not pattern.
+
+define <2 x i64> @is_negative(<4 x i32> %x) {
+; CHECK-LABEL: @is_negative(
+; CHECK-NEXT: [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: [[NOTNOT:%.*]] = bitcast <4 x i32> [[LOBIT]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[NOTNOT]]
+;
+ %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+ %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc = bitcast <4 x i32> %not to <2 x i64>
+ %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
+ ret <2 x i64> %notnot
+}
+
+; This variation has an extra bitcast at the end. This means that the 2nd xor
+; can be done in <4 x i32> to eliminate a bitcast regardless of canonicalizaion.
+
+define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
+; CHECK-LABEL: @is_negative_bonus_bitcast(
+; CHECK-NEXT: [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[LOBIT]]
+;
+ %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+ %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc = bitcast <4 x i32> %not to <2 x i64>
+ %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
+ %bc2 = bitcast <2 x i64> %notnot to <4 x i32>
+ ret <4 x i32> %bc2
+}
+
+; Negative test: bitcasts are canonicalized below bitwise logic. No changes here.
+
+define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) {
+; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant(
+; CHECK-NEXT: [[A:%.*]] = and <4 x i4> %x, <i4 0, i4 -8, i4 0, i4 -8>
+; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i4> [[A]] to <2 x i8>
+; CHECK-NEXT: ret <2 x i8> [[B]]
+;
+ %a = and <4 x i4> %x, <i4 0, i4 8, i4 0, i4 8>
+ %b = bitcast <4 x i4> %a to <2 x i8>
+ ret <2 x i8> %b
+}
+
; PR27925 - https://llvm.org/bugs/show_bug.cgi?id=27925
define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) {
diff --git a/test/Transforms/InstCombine/ctpop.ll b/test/Transforms/InstCombine/ctpop.ll
index 38612c92aaa4..6bc6f9731979 100644
--- a/test/Transforms/InstCombine/ctpop.ll
+++ b/test/Transforms/InstCombine/ctpop.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -instcombine | FileCheck %s
declare i32 @llvm.ctpop.i32(i32)
@@ -5,8 +6,9 @@ declare i8 @llvm.ctpop.i8(i8)
declare void @llvm.assume(i1)
define i1 @test1(i32 %arg) {
-; CHECK: @test1
-; CHECK: ret i1 false
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: ret i1 false
+;
%and = and i32 %arg, 15
%cnt = call i32 @llvm.ctpop.i32(i32 %and)
%res = icmp eq i32 %cnt, 9
@@ -14,8 +16,9 @@ define i1 @test1(i32 %arg) {
}
define i1 @test2(i32 %arg) {
-; CHECK: @test2
-; CHECK: ret i1 false
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: ret i1 false
+;
%and = and i32 %arg, 1
%cnt = call i32 @llvm.ctpop.i32(i32 %and)
%res = icmp eq i32 %cnt, 2
@@ -23,9 +26,12 @@ define i1 @test2(i32 %arg) {
}
define i1 @test3(i32 %arg) {
-; CHECK: @test3
-; CHECK: ret i1 false
- ;; Use an assume to make all the bits known without triggering constant
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[ASSUME:%.*]] = icmp eq i32 [[ARG:%.*]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[ASSUME]])
+; CHECK-NEXT: ret i1 false
+;
+ ;; Use an assume to make all the bits known without triggering constant
;; folding. This is trying to hit a corner case where we have to avoid
;; taking the log of 0.
%assume = icmp eq i32 %arg, 0
@@ -37,8 +43,11 @@ define i1 @test3(i32 %arg) {
; Negative test for when we know nothing
define i1 @test4(i8 %arg) {
-; CHECK: @test4
-; CHECK: ret i1 %res
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2
+; CHECK-NEXT: ret i1 [[RES]]
+;
%cnt = call i8 @llvm.ctpop.i8(i8 %arg)
%res = icmp eq i8 %cnt, 2
ret i1 %res
diff --git a/test/Transforms/InstCombine/icmp-xor-signbit.ll b/test/Transforms/InstCombine/icmp-xor-signbit.ll
new file mode 100644
index 000000000000..30a9668f37df
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-xor-signbit.ll
@@ -0,0 +1,228 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
+
+define i1 @slt_to_ult(i8 %x, i8 %y) {
+; CHECK-LABEL: @slt_to_ult(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 %x, %y
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a = xor i8 %x, 128
+ %b = xor i8 %y, 128
+ %cmp = icmp slt i8 %a, %b
+ ret i1 %cmp
+}
+
+; PR33138 - https://bugs.llvm.org/show_bug.cgi?id=33138
+
+define <2 x i1> @slt_to_ult_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @slt_to_ult_splat(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> %x, %y
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %a = xor <2 x i8> %x, <i8 128, i8 128>
+ %b = xor <2 x i8> %y, <i8 128, i8 128>
+ %cmp = icmp slt <2 x i8> %a, %b
+ ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @ult_to_slt(i8 %x, i8 %y) {
+; CHECK-LABEL: @ult_to_slt(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 %x, %y
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a = xor i8 %x, 128
+ %b = xor i8 %y, 128
+ %cmp = icmp ult i8 %a, %b
+ ret i1 %cmp
+}
+
+define <2 x i1> @ult_to_slt_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @ult_to_slt_splat(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> %x, %y
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %a = xor <2 x i8> %x, <i8 128, i8 128>
+ %b = xor <2 x i8> %y, <i8 128, i8 128>
+ %cmp = icmp ult <2 x i8> %a, %b
+ ret <2 x i1> %cmp
+}
+
+; icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
+
+define i1 @slt_to_ugt(i8 %x, i8 %y) {
+; CHECK-LABEL: @slt_to_ugt(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 %x, %y
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a = xor i8 %x, 127
+ %b = xor i8 %y, 127
+ %cmp = icmp slt i8 %a, %b
+ ret i1 %cmp
+}
+
+define <2 x i1> @slt_to_ugt_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @slt_to_ugt_splat(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> %x, %y
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %a = xor <2 x i8> %x, <i8 127, i8 127>
+ %b = xor <2 x i8> %y, <i8 127, i8 127>
+ %cmp = icmp slt <2 x i8> %a, %b
+ ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @ult_to_sgt(i8 %x, i8 %y) {
+; CHECK-LABEL: @ult_to_sgt(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 %x, %y
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a = xor i8 %x, 127
+ %b = xor i8 %y, 127
+ %cmp = icmp ult i8 %a, %b
+ ret i1 %cmp
+}
+
+define <2 x i1> @ult_to_sgt_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @ult_to_sgt_splat(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> %x, %y
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %a = xor <2 x i8> %x, <i8 127, i8 127>
+ %b = xor <2 x i8> %y, <i8 127, i8 127>
+ %cmp = icmp ult <2 x i8> %a, %b
+ ret <2 x i1> %cmp
+}
+
+; icmp u/s (a ^ signmask), C --> icmp s/u a, C'
+
+define i1 @sge_to_ugt(i8 %x) {
+; CHECK-LABEL: @sge_to_ugt(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 %x, -114
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a = xor i8 %x, 128
+ %cmp = icmp sge i8 %a, 15
+ ret i1 %cmp
+}
+
+define <2 x i1> @sge_to_ugt_splat(<2 x i8> %x) {
+; CHECK-LABEL: @sge_to_ugt_splat(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> %x, <i8 -114, i8 -114>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %a = xor <2 x i8> %x, <i8 128, i8 128>
+ %cmp = icmp sge <2 x i8> %a, <i8 15, i8 15>
+ ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @uge_to_sgt(i8 %x) {
+; CHECK-LABEL: @uge_to_sgt(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 %x, -114
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a = xor i8 %x, 128
+ %cmp = icmp uge i8 %a, 15
+ ret i1 %cmp
+}
+
+define <2 x i1> @uge_to_sgt_splat(<2 x i8> %x) {
+; CHECK-LABEL: @uge_to_sgt_splat(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -114, i8 -114>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %a = xor <2 x i8> %x, <i8 128, i8 128>
+ %cmp = icmp uge <2 x i8> %a, <i8 15, i8 15>
+ ret <2 x i1> %cmp
+}
+
+; icmp u/s (a ^ maxsignval), C --> icmp s/u' a, C'
+
+define i1 @sge_to_ult(i8 %x) {
+; CHECK-LABEL: @sge_to_ult(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 %x, 113
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a = xor i8 %x, 127
+ %cmp = icmp sge i8 %a, 15
+ ret i1 %cmp
+}
+
+define <2 x i1> @sge_to_ult_splat(<2 x i8> %x) {
+; CHECK-LABEL: @sge_to_ult_splat(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> %x, <i8 113, i8 113>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %a = xor <2 x i8> %x, <i8 127, i8 127>
+ %cmp = icmp sge <2 x i8> %a, <i8 15, i8 15>
+ ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @uge_to_slt(i8 %x) {
+; CHECK-LABEL: @uge_to_slt(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 %x, 113
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a = xor i8 %x, 127
+ %cmp = icmp uge i8 %a, 15
+ ret i1 %cmp
+}
+
+define <2 x i1> @uge_to_slt_splat(<2 x i8> %x) {
+; CHECK-LABEL: @uge_to_slt_splat(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> %x, <i8 113, i8 113>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %a = xor <2 x i8> %x, <i8 127, i8 127>
+ %cmp = icmp uge <2 x i8> %a, <i8 15, i8 15>
+ ret <2 x i1> %cmp
+}
+
+; PR33138, part 2: https://bugs.llvm.org/show_bug.cgi?id=33138
+; TODO: We could look through vector bitcasts for icmp folds,
+; or we could canonicalize bitcast ahead of logic ops with constants.
+
+define <8 x i1> @sgt_to_ugt_bitcasted_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @sgt_to_ugt_bitcasted_splat(
+; CHECK-NEXT: [[A:%.*]] = xor <2 x i32> %x, <i32 -2139062144, i32 -2139062144>
+; CHECK-NEXT: [[B:%.*]] = xor <2 x i32> %y, <i32 -2139062144, i32 -2139062144>
+; CHECK-NEXT: [[C:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+; CHECK-NEXT: [[D:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+; CHECK-NEXT: [[E:%.*]] = icmp sgt <8 x i8> [[C]], [[D]]
+; CHECK-NEXT: ret <8 x i1> [[E]]
+;
+ %a = xor <2 x i32> %x, <i32 2155905152, i32 2155905152> ; 0x80808080
+ %b = xor <2 x i32> %y, <i32 2155905152, i32 2155905152>
+ %c = bitcast <2 x i32> %a to <8 x i8>
+ %d = bitcast <2 x i32> %b to <8 x i8>
+ %e = icmp sgt <8 x i8> %c, %d
+ ret <8 x i1> %e
+}
+
+; TODO: This is false (little-endian). How should that be recognized?
+; Ie, should InstSimplify know this directly, should InstCombine canonicalize
+; this so InstSimplify can know this, or is that not something that we want
+; either pass to recognize?
+
+define <2 x i1> @negative_simplify_splat(<4 x i8> %x) {
+; CHECK-LABEL: @negative_simplify_splat(
+; CHECK-NEXT: [[A:%.*]] = or <4 x i8> %x, <i8 0, i8 -128, i8 0, i8 -128>
+; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i8> [[A]] to <2 x i16>
+; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[B]], zeroinitializer
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %a = or <4 x i8> %x, <i8 0, i8 128, i8 0, i8 128>
+ %b = bitcast <4 x i8> %a to <2 x i16>
+ %c = icmp sgt <2 x i16> %b, zeroinitializer
+ ret <2 x i1> %c
+}
+
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 6f657b190454..ed570da73c9e 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -2895,3 +2895,67 @@ define i1 @cmp_ult_rhs_dec(float %x, i32 %y) {
%cmp = icmp ult i32 %conv, %dec
ret i1 %cmp
}
+
+define i1 @eq_add_constants(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_add_constants(
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %A = add i32 %x, 5
+ %B = add i32 %y, 5
+ %C = icmp eq i32 %A, %B
+ ret i1 %C
+}
+
+define i1 @eq_mul_constants(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_mul_constants(
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %A = mul i32 %x, 5
+ %B = mul i32 %y, 5
+ %C = icmp eq i32 %A, %B
+ ret i1 %C
+}
+
+define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @eq_mul_constants_splat(
+; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> %x, %y
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %A = mul <2 x i32> %x, <i32 5, i32 5>
+ %B = mul <2 x i32> %y, <i32 5, i32 5>
+ %C = icmp ne <2 x i32> %A, %B
+ ret <2 x i1> %C
+}
+
+; If the multiply constant has any trailing zero bits, we get something completely different.
+; We mask off the high bits of each input and then convert:
+; (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+
+define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_mul_constants_with_tz(
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %x, %y
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823
+; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %A = mul i32 %x, 12
+ %B = mul i32 %y, 12
+ %C = icmp ne i32 %A, %B
+ ret i1 %C
+}
+
+define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @eq_mul_constants_with_tz_splat(
+; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> %x, %y
+; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %A = mul <2 x i32> %x, <i32 12, i32 12>
+ %B = mul <2 x i32> %y, <i32 12, i32 12>
+ %C = icmp eq <2 x i32> %A, %B
+ ret <2 x i1> %C
+}
+
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index 988ec2b71c50..68daac65ee6b 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -1,64 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8 %a, i8 %b)
declare {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b)
declare {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a, i8 %b)
declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+declare {i8, i1} @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
define i1 @test_uadd1() {
; CHECK-LABEL: @test_uadd1(
+; CHECK-NEXT: ret i1 true
+;
%x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 3)
%overflow = extractvalue {i8, i1} %x, 1
ret i1 %overflow
-; CHECK-NEXT: ret i1 true
}
define i8 @test_uadd2() {
; CHECK-LABEL: @test_uadd2(
+; CHECK-NEXT: ret i8 42
+;
%x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 44)
%result = extractvalue {i8, i1} %x, 0
ret i8 %result
-; CHECK-NEXT: ret i8 42
+}
+
+define {i8, i1} @test_uadd3(i8 %v) {
+; CHECK-LABEL: @test_uadd3(
+; CHECK-NEXT: ret { i8, i1 } undef
+;
+ %result = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %v, i8 undef)
+ ret {i8, i1} %result
+}
+
+define {i8, i1} @test_uadd4(i8 %v) {
+; CHECK-LABEL: @test_uadd4(
+; CHECK-NEXT: ret { i8, i1 } undef
+;
+ %result = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 undef, i8 %v)
+ ret {i8, i1} %result
+}
+
+define i1 @test_sadd1() {
+; CHECK-LABEL: @test_sadd1(
+; CHECK-NEXT: ret i1 true
+;
+ %x = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 126, i8 3)
+ %overflow = extractvalue {i8, i1} %x, 1
+ ret i1 %overflow
+}
+
+define i8 @test_sadd2() {
+; CHECK-LABEL: @test_sadd2(
+; CHECK-NEXT: ret i8 -86
+;
+ %x = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 126, i8 44)
+ %result = extractvalue {i8, i1} %x, 0
+ ret i8 %result
+}
+
+define {i8, i1} @test_sadd3(i8 %v) {
+; CHECK-LABEL: @test_sadd3(
+; CHECK-NEXT: ret { i8, i1 } undef
+;
+ %result = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %v, i8 undef)
+ ret {i8, i1} %result
+}
+
+define {i8, i1} @test_sadd4(i8 %v) {
+; CHECK-LABEL: @test_sadd4(
+; CHECK-NEXT: ret { i8, i1 } undef
+;
+ %result = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 undef, i8 %v)
+ ret {i8, i1} %result
}
define {i8, i1} @test_usub1(i8 %V) {
; CHECK-LABEL: @test_usub1(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
%x = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %V, i8 %V)
ret {i8, i1} %x
-; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+}
+
+define {i8, i1} @test_usub2(i8 %V) {
+; CHECK-LABEL: @test_usub2(
+; CHECK-NEXT: ret { i8, i1 } undef
+;
+ %x = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %V, i8 undef)
+ ret {i8, i1} %x
+}
+
+define {i8, i1} @test_usub3(i8 %V) {
+; CHECK-LABEL: @test_usub3(
+; CHECK-NEXT: ret { i8, i1 } undef
+;
+ %x = call {i8, i1} @llvm.usub.with.overflow.i8(i8 undef, i8 %V)
+ ret {i8, i1} %x
}
define {i8, i1} @test_ssub1(i8 %V) {
; CHECK-LABEL: @test_ssub1(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
%x = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %V, i8 %V)
ret {i8, i1} %x
-; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+}
+
+define {i8, i1} @test_ssub2(i8 %V) {
+; CHECK-LABEL: @test_ssub2(
+; CHECK-NEXT: ret { i8, i1 } undef
+;
+ %x = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %V, i8 undef)
+ ret {i8, i1} %x
+}
+
+define {i8, i1} @test_ssub3(i8 %V) {
+; CHECK-LABEL: @test_ssub3(
+; CHECK-NEXT: ret { i8, i1 } undef
+;
+ %x = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 undef, i8 %V)
+ ret {i8, i1} %x
}
define {i8, i1} @test_umul1(i8 %V) {
; CHECK-LABEL: @test_umul1(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
%x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %V, i8 0)
ret {i8, i1} %x
-; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+}
+
+define {i8, i1} @test_umul2(i8 %V) {
+; CHECK-LABEL: @test_umul2(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
+ %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %V, i8 undef)
+ ret {i8, i1} %x
+}
+
+define {i8, i1} @test_umul3(i8 %V) {
+; CHECK-LABEL: @test_umul3(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
+ %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 0, i8 %V)
+ ret {i8, i1} %x
+}
+
+define {i8, i1} @test_umul4(i8 %V) {
+; CHECK-LABEL: @test_umul4(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
+ %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 %V)
+ ret {i8, i1} %x
+}
+
+define {i8, i1} @test_smul1(i8 %V) {
+; CHECK-LABEL: @test_smul1(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
+ %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %V, i8 0)
+ ret {i8, i1} %x
+}
+
+define {i8, i1} @test_smul2(i8 %V) {
+; CHECK-LABEL: @test_smul2(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
+ %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %V, i8 undef)
+ ret {i8, i1} %x
+}
+
+define {i8, i1} @test_smul3(i8 %V) {
+; CHECK-LABEL: @test_smul3(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
+ %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 0, i8 %V)
+ ret {i8, i1} %x
+}
+
+define {i8, i1} @test_smul4(i8 %V) {
+; CHECK-LABEL: @test_smul4(
+; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+;
+ %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 undef, i8 %V)
+ ret {i8, i1} %x
}
declare i256 @llvm.cttz.i256(i256 %src, i1 %is_zero_undef)
define i256 @test_cttz() {
; CHECK-LABEL: @test_cttz(
+; CHECK-NEXT: ret i256 1
+;
%x = call i256 @llvm.cttz.i256(i256 10, i1 false)
ret i256 %x
-; CHECK-NEXT: ret i256 1
}
declare i256 @llvm.ctpop.i256(i256 %src)
define i256 @test_ctpop() {
; CHECK-LABEL: @test_ctpop(
+; CHECK-NEXT: ret i256 2
+;
%x = call i256 @llvm.ctpop.i256(i256 10)
ret i256 %x
-; CHECK-NEXT: ret i256 2
}
; Test a non-intrinsic that we know about as a library call.
@@ -66,14 +214,15 @@ declare float @fabs(float %x)
define float @test_fabs_libcall() {
; CHECK-LABEL: @test_fabs_libcall(
+; CHECK-NEXT: [[X:%.*]] = call float @fabs(float -4.200000e+01)
+; CHECK-NEXT: ret float 4.200000e+01
+;
%x = call float @fabs(float -42.0)
; This is still a real function call, so instsimplify won't nuke it -- other
; passes have to do that.
-; CHECK-NEXT: call float @fabs
ret float %x
-; CHECK-NEXT: ret float 4.2{{0+}}e+01
}
@@ -87,34 +236,35 @@ declare float @llvm.nearbyint.f32(float) nounwind readnone
; Test idempotent intrinsics
define float @test_idempotence(float %a) {
; CHECK-LABEL: @test_idempotence(
+; CHECK-NEXT: [[A0:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]])
+; CHECK-NEXT: [[B0:%.*]] = call float @llvm.floor.f32(float [[A]])
+; CHECK-NEXT: [[C0:%.*]] = call float @llvm.ceil.f32(float [[A]])
+; CHECK-NEXT: [[D0:%.*]] = call float @llvm.trunc.f32(float [[A]])
+; CHECK-NEXT: [[E0:%.*]] = call float @llvm.rint.f32(float [[A]])
+; CHECK-NEXT: [[F0:%.*]] = call float @llvm.nearbyint.f32(float [[A]])
+; CHECK-NEXT: [[R0:%.*]] = fadd float [[A0]], [[B0]]
+; CHECK-NEXT: [[R1:%.*]] = fadd float [[R0]], [[C0]]
+; CHECK-NEXT: [[R2:%.*]] = fadd float [[R1]], [[D0]]
+; CHECK-NEXT: [[R3:%.*]] = fadd float [[R2]], [[E0]]
+; CHECK-NEXT: [[R4:%.*]] = fadd float [[R3]], [[F0]]
+; CHECK-NEXT: ret float [[R4]]
+;
-; CHECK: fabs
-; CHECK-NOT: fabs
%a0 = call float @llvm.fabs.f32(float %a)
%a1 = call float @llvm.fabs.f32(float %a0)
-; CHECK: floor
-; CHECK-NOT: floor
%b0 = call float @llvm.floor.f32(float %a)
%b1 = call float @llvm.floor.f32(float %b0)
-; CHECK: ceil
-; CHECK-NOT: ceil
%c0 = call float @llvm.ceil.f32(float %a)
%c1 = call float @llvm.ceil.f32(float %c0)
-; CHECK: trunc
-; CHECK-NOT: trunc
%d0 = call float @llvm.trunc.f32(float %a)
%d1 = call float @llvm.trunc.f32(float %d0)
-; CHECK: rint
-; CHECK-NOT: rint
%e0 = call float @llvm.rint.f32(float %a)
%e1 = call float @llvm.rint.f32(float %e0)
-; CHECK: nearbyint
-; CHECK-NOT: nearbyint
%f0 = call float @llvm.nearbyint.f32(float %a)
%f1 = call float @llvm.nearbyint.f32(float %f0)
@@ -128,6 +278,17 @@ define float @test_idempotence(float %a) {
}
define i8* @operator_new() {
+; CHECK-LABEL: @operator_new(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @_Znwm(i64 8)
+; CHECK-NEXT: br i1 false, label [[CAST_END:%.*]], label [[CAST_NOTNULL:%.*]]
+; CHECK: cast.notnull:
+; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 4
+; CHECK-NEXT: br label [[CAST_END]]
+; CHECK: cast.end:
+; CHECK-NEXT: [[CAST_RESULT:%.*]] = phi i8* [ [[ADD_PTR]], [[CAST_NOTNULL]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i8* [[CAST_RESULT]]
+;
entry:
%call = tail call noalias i8* @_Znwm(i64 8)
%cmp = icmp eq i8* %call, null
@@ -141,8 +302,6 @@ cast.end: ; preds = %cast.notnull, %entr
%cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
ret i8* %cast.result
-; CHECK-LABEL: @operator_new
-; CHECK: br i1 false, label %cast.end, label %cast.notnull
}
declare nonnull noalias i8* @_Znwm(i64)
@@ -151,6 +310,18 @@ declare nonnull noalias i8* @_Znwm(i64)
@_ZSt7nothrow = external global %"struct.std::nothrow_t"
define i8* @operator_new_nothrow_t() {
+; CHECK-LABEL: @operator_new_nothrow_t(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @_ZnamRKSt9nothrow_t(i64 8, %"struct.std::nothrow_t"* @_ZSt7nothrow)
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT: br i1 [[CMP]], label [[CAST_END:%.*]], label [[CAST_NOTNULL:%.*]]
+; CHECK: cast.notnull:
+; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 4
+; CHECK-NEXT: br label [[CAST_END]]
+; CHECK: cast.end:
+; CHECK-NEXT: [[CAST_RESULT:%.*]] = phi i8* [ [[ADD_PTR]], [[CAST_NOTNULL]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i8* [[CAST_RESULT]]
+;
entry:
%call = tail call noalias i8* @_ZnamRKSt9nothrow_t(i64 8, %"struct.std::nothrow_t"* @_ZSt7nothrow)
%cmp = icmp eq i8* %call, null
@@ -164,13 +335,23 @@ cast.end: ; preds = %cast.notnull, %entr
%cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
ret i8* %cast.result
-; CHECK-LABEL: @operator_new_nothrow_t
-; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
}
declare i8* @_ZnamRKSt9nothrow_t(i64, %"struct.std::nothrow_t"*) nounwind
define i8* @malloc_can_return_null() {
+; CHECK-LABEL: @malloc_can_return_null(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @malloc(i64 8)
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT: br i1 [[CMP]], label [[CAST_END:%.*]], label [[CAST_NOTNULL:%.*]]
+; CHECK: cast.notnull:
+; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 4
+; CHECK-NEXT: br label [[CAST_END]]
+; CHECK: cast.end:
+; CHECK-NEXT: [[CAST_RESULT:%.*]] = phi i8* [ [[ADD_PTR]], [[CAST_NOTNULL]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i8* [[CAST_RESULT]]
+;
entry:
%call = tail call noalias i8* @malloc(i64 8)
%cmp = icmp eq i8* %call, null
@@ -184,38 +365,44 @@ cast.end: ; preds = %cast.notnull, %entr
%cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
ret i8* %cast.result
-; CHECK-LABEL: @malloc_can_return_null
-; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
}
define i32 @call_null() {
+; CHECK-LABEL: @call_null(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 null()
+; CHECK-NEXT: ret i32 undef
+;
entry:
%call = call i32 null()
ret i32 %call
}
-; CHECK-LABEL: define i32 @call_null(
-; CHECK: ret i32 undef
define i32 @call_undef() {
+; CHECK-LABEL: @call_undef(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 undef()
+; CHECK-NEXT: ret i32 undef
+;
entry:
%call = call i32 undef()
ret i32 %call
}
-; CHECK-LABEL: define i32 @call_undef(
-; CHECK: ret i32 undef
@GV = private constant [8 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49]
define <8 x i32> @partial_masked_load() {
; CHECK-LABEL: @partial_masked_load(
-; CHECK: ret <8 x i32> <i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
+; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
+;
%masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -2) to <8 x i32>*), i32 4, <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
ret <8 x i32> %masked.load
}
define <8 x i32> @masked_load_undef_mask(<8 x i32>* %V) {
; CHECK-LABEL: @masked_load_undef_mask(
-; CHECK: ret <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT: ret <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>
+;
%masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %V, i32 4, <8 x i1> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>)
ret <8 x i32> %masked.load
}
diff --git a/test/Transforms/InstSimplify/or.ll b/test/Transforms/InstSimplify/or.ll
index 2c5b6181bc6c..14b08af00646 100644
--- a/test/Transforms/InstSimplify/or.ll
+++ b/test/Transforms/InstSimplify/or.ll
@@ -159,7 +159,7 @@ define i399 @test4_apint(i399 %V, i399 %M) {
%A = add i399 %V, %N
%B = and i399 %A, %C1
%D = and i399 %V, 274877906943
- %R = or i399 %B, %D
+ %R = or i399 %D, %B
ret i399 %R
}
@@ -179,3 +179,42 @@ define i117 @test6_apint(i117 %X) {
ret i117 %Y
}
+; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
+; Vector version of test1_apint with the add commuted
+define <2 x i39> @test7_apint(<2 x i39> %V, <2 x i39> %M) {
+; CHECK-LABEL: @test7_apint(
+; CHECK-NEXT: [[N:%.*]] = and <2 x i39> [[M:%.*]], <i39 -274877906944, i39 -274877906944>
+; CHECK-NEXT: [[A:%.*]] = add <2 x i39> [[N]], [[V:%.*]]
+; CHECK-NEXT: ret <2 x i39> [[A]]
+;
+ ;; If we have: ((V + N) & C1) | (V & C2)
+ ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ ;; replace with V+N.
+ %C1 = xor <2 x i39> <i39 274877906943, i39 274877906943>, <i39 -1, i39 -1> ;; C2 = 274877906943
+ %N = and <2 x i39> %M, <i39 274877906944, i39 274877906944>
+ %A = add <2 x i39> %N, %V
+ %B = and <2 x i39> %A, %C1
+ %D = and <2 x i39> %V, <i39 274877906943, i39 274877906943>
+ %R = or <2 x i39> %B, %D
+ ret <2 x i39> %R
+}
+
+; Test the case where Integer BitWidth > 64 && BitWidth <= 1024.
+; Vector version of test4_apint with the add and the or commuted
+define <2 x i399> @test8_apint(<2 x i399> %V, <2 x i399> %M) {
+; CHECK-LABEL: @test8_apint(
+; CHECK-NEXT: [[N:%.*]] = and <2 x i399> [[M:%.*]], <i399 18446742974197923840, i399 18446742974197923840>
+; CHECK-NEXT: [[A:%.*]] = add <2 x i399> [[N]], [[V:%.*]]
+; CHECK-NEXT: ret <2 x i399> [[A]]
+;
+ ;; If we have: ((V + N) & C1) | (V & C2)
+ ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ ;; replace with V+N.
+ %C1 = xor <2 x i399> <i399 274877906943, i399 274877906943>, <i399 -1, i399 -1> ;; C2 = 274877906943
+ %N = and <2 x i399> %M, <i399 18446742974197923840, i399 18446742974197923840>
+ %A = add <2 x i399> %N, %V
+ %B = and <2 x i399> %A, %C1
+ %D = and <2 x i399> %V, <i399 274877906943, i399 274877906943>
+ %R = or <2 x i399> %D, %B
+ ret <2 x i399> %R
+}
diff --git a/test/Transforms/JumpThreading/assume.ll b/test/Transforms/JumpThreading/assume.ll
index 3a039676e172..f58ee299cba0 100644
--- a/test/Transforms/JumpThreading/assume.ll
+++ b/test/Transforms/JumpThreading/assume.ll
@@ -59,12 +59,12 @@ return: ; preds = %entry, %if.then
@g = external global i32
; Check that we do prove a fact using an assume within the block.
-; FIXME: We can fold the assume based on the semantics of assume.
-; CHECK-LABEL: @can_fold_assume
-; CHECK: %notnull = icmp ne i32* %array, null
-; CHECK-NEXT: call void @llvm.assume(i1 %notnull)
-; CHECK-NEXT: ret void
+; We can fold the assume based on the semantics of assume.
define void @can_fold_assume(i32* %array) {
+; CHECK-LABEL: @can_fold_assume
+; CHECK-NOT: call void @llvm.assume
+; CHECK-NOT: br
+; CHECK: ret void
%notnull = icmp ne i32* %array, null
call void @llvm.assume(i1 %notnull)
br i1 %notnull, label %normal, label %error
@@ -80,19 +80,128 @@ error:
declare void @f(i1)
declare void @exit()
; We can fold the assume but not the uses before the assume.
-define void @dont_fold_incorrectly(i32* %array) {
-; CHECK-LABEL:@dont_fold_incorrectly
+define void @cannot_fold_use_before_assume(i32* %array) {
+; CHECK-LABEL:@cannot_fold_use_before_assume
; CHECK: @f(i1 %notnull)
; CHECK-NEXT: exit()
-; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NOT: assume
+; CHECK-NEXT: ret void
+ %notnull = icmp ne i32* %array, null
+ call void @f(i1 %notnull)
+ call void @exit()
+ call void @llvm.assume(i1 %notnull)
+ br i1 %notnull, label %normal, label %error
+
+normal:
+ ret void
+
+error:
+ store atomic i32 0, i32* @g unordered, align 4
+ ret void
+}
+
+declare void @dummy(i1) nounwind argmemonly
+define void @can_fold_some_use_before_assume(i32* %array) {
+
+; CHECK-LABEL:@can_fold_some_use_before_assume
+; CHECK: @f(i1 %notnull)
+; CHECK-NEXT: @dummy(i1 true)
+; CHECK-NOT: assume
; CHECK-NEXT: ret void
%notnull = icmp ne i32* %array, null
call void @f(i1 %notnull)
+ call void @dummy(i1 %notnull)
+ call void @llvm.assume(i1 %notnull)
+ br i1 %notnull, label %normal, label %error
+
+normal:
+ ret void
+
+error:
+ store atomic i32 0, i32* @g unordered, align 4
+ ret void
+
+}
+
+; FIXME: can fold assume and all uses before/after assume.
+; because the trapping exit call is after the assume.
+define void @can_fold_assume_and_all_uses(i32* %array) {
+; CHECK-LABEL:@can_fold_assume_and_all_uses
+; CHECK: @dummy(i1 %notnull)
+; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NEXT: exit()
+; CHECK-NEXT: %notnull2 = or i1 true, false
+; CHECK-NEXT: @f(i1 %notnull2)
+; CHECK-NEXT: ret void
+ %notnull = icmp ne i32* %array, null
+ call void @dummy(i1 %notnull)
+ call void @llvm.assume(i1 %notnull)
call void @exit()
+ br i1 %notnull, label %normal, label %error
+
+normal:
+ %notnull2 = or i1 %notnull, false
+ call void @f(i1 %notnull2)
+ ret void
+
+error:
+ store atomic i32 0, i32* @g unordered, align 4
+ ret void
+}
+
+declare void @fz(i8)
+; FIXME: We can fold assume to true, and the use after assume, but we do not do so
+; currently, because of the function call after the assume.
+define void @can_fold_assume2(i32* %array) {
+
+; CHECK-LABEL:@can_fold_assume2
+; CHECK: @f(i1 %notnull)
+; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NEXT: znotnull = zext i1 %notnull to i8
+; CHECK-NEXT: @f(i1 %notnull)
+; CHECK-NEXT: @f(i1 true)
+; CHECK-NEXT: @fz(i8 %znotnull)
+; CHECK-NEXT: ret void
+ %notnull = icmp ne i32* %array, null
+ call void @f(i1 %notnull)
+ call void @llvm.assume(i1 %notnull)
+ %znotnull = zext i1 %notnull to i8
+ call void @f(i1 %notnull)
+ br i1 %notnull, label %normal, label %error
+
+normal:
+ call void @f(i1 %notnull)
+ call void @fz(i8 %znotnull)
+ ret void
+
+error:
+ store atomic i32 0, i32* @g unordered, align 4
+ ret void
+}
+
+declare void @llvm.experimental.guard(i1, ...)
+; FIXME: We can fold assume to true, but we do not do so
+; because of the guard following the assume.
+define void @can_fold_assume3(i32* %array){
+
+; CHECK-LABEL:@can_fold_assume3
+; CHECK: @f(i1 %notnull)
+; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NEXT: guard(i1 %notnull)
+; CHECK-NEXT: znotnull = zext i1 true to i8
+; CHECK-NEXT: @f(i1 true)
+; CHECK-NEXT: @fz(i8 %znotnull)
+; CHECK-NEXT: ret void
+ %notnull = icmp ne i32* %array, null
+ call void @f(i1 %notnull)
call void @llvm.assume(i1 %notnull)
+ call void(i1, ...) @llvm.experimental.guard(i1 %notnull) [ "deopt"() ]
+ %znotnull = zext i1 %notnull to i8
br i1 %notnull, label %normal, label %error
normal:
+ call void @f(i1 %notnull)
+ call void @fz(i8 %znotnull)
ret void
error:
@@ -100,6 +209,26 @@ error:
ret void
}
+
+; can fold all uses and remove the cond
+define void @can_fold_assume4(i32* %array) {
+; CHECK-LABEL: can_fold_assume4
+; CHECK-NOT: notnull
+; CHECK: dummy(i1 true)
+; CHECK-NEXT: ret void
+ %notnull = icmp ne i32* %array, null
+ call void @exit()
+ call void @dummy(i1 %notnull)
+ call void @llvm.assume(i1 %notnull)
+ br i1 %notnull, label %normal, label %error
+
+normal:
+ ret void
+
+error:
+ store atomic i32 0, i32* @g unordered, align 4
+ ret void
+}
; Function Attrs: nounwind
declare void @llvm.assume(i1) #1
diff --git a/test/Transforms/JumpThreading/fold-not-thread.ll b/test/Transforms/JumpThreading/fold-not-thread.ll
index f05169b31bc8..85cdcc0d9b33 100644
--- a/test/Transforms/JumpThreading/fold-not-thread.ll
+++ b/test/Transforms/JumpThreading/fold-not-thread.ll
@@ -133,10 +133,10 @@ L3:
ret void
}
-; FIXME: Make sure we can do the RAUW for %add...
+; Make sure we can do the RAUW for %add...
;
; CHECK-LABEL: @rauw_if_possible(
-; CHECK: call void @f4(i32 %add)
+; CHECK: call void @f4(i32 96)
define void @rauw_if_possible(i32 %value) nounwind {
entry:
%cmp = icmp eq i32 %value, 32
diff --git a/test/Transforms/JumpThreading/guards.ll b/test/Transforms/JumpThreading/guards.ll
index c5f72b113efc..53175a7b7253 100644
--- a/test/Transforms/JumpThreading/guards.ll
+++ b/test/Transforms/JumpThreading/guards.ll
@@ -182,86 +182,89 @@ Exit:
ret void
}
-declare void @never_called()
+declare void @never_called(i1)
-; Assume the guard is always taken and we deoptimize, so we never reach the
-; branch below that guard. We should *never* change the behaviour of a guard from
-; `must deoptimize` to `may deoptimize`, since this affects the program
-; semantics.
+; LVI uses guard to identify value of %c2 in branch as true, we cannot replace that
+; guard with guard(true & c1).
define void @dont_fold_guard(i8* %addr, i32 %i, i32 %length) {
; CHECK-LABEL: dont_fold_guard
-; CHECK: experimental.guard(i1 %wide.chk)
-
-entry:
- br label %BBPred
+; CHECK: %wide.chk = and i1 %c1, %c2
+; CHECK-NEXT: experimental.guard(i1 %wide.chk)
+; CHECK-NEXT: call void @never_called(i1 true)
+; CHECK-NEXT: ret void
+ %c1 = icmp ult i32 %i, %length
+ %c2 = icmp eq i32 %i, 0
+ %wide.chk = and i1 %c1, %c2
+ call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+ br i1 %c2, label %BB1, label %BB2
-BBPred:
- %cond = icmp eq i8* %addr, null
- br i1 %cond, label %zero, label %not_zero
+BB1:
+ call void @never_called(i1 %c2)
+ ret void
-zero:
- unreachable
+BB2:
+ ret void
+}
-not_zero:
+declare void @dummy(i1) nounwind argmemonly
+; same as dont_fold_guard1 but there's a use immediately after guard and before
+; branch. We can fold that use.
+define void @dont_fold_guard2(i8* %addr, i32 %i, i32 %length) {
+; CHECK-LABEL: dont_fold_guard2
+; CHECK: %wide.chk = and i1 %c1, %c2
+; CHECK-NEXT: experimental.guard(i1 %wide.chk)
+; CHECK-NEXT: dummy(i1 true)
+; CHECK-NEXT: call void @never_called(i1 true)
+; CHECK-NEXT: ret void
%c1 = icmp ult i32 %i, %length
%c2 = icmp eq i32 %i, 0
%wide.chk = and i1 %c1, %c2
call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
- br i1 %c2, label %unreachedBB2, label %unreachedBB1
+ call void @dummy(i1 %c2)
+ br i1 %c2, label %BB1, label %BB2
-unreachedBB2:
- call void @never_called()
+BB1:
+ call void @never_called(i1 %c2)
ret void
-unreachedBB1:
+BB2:
ret void
}
-
; same as dont_fold_guard1 but condition %cmp is not an instruction.
; We cannot fold the guard under any circumstance.
; FIXME: We can merge unreachableBB2 into not_zero.
-define void @dont_fold_guard2(i8* %addr, i1 %cmp, i32 %i, i32 %length) {
-; CHECK-LABEL: dont_fold_guard2
+define void @dont_fold_guard3(i8* %addr, i1 %cmp, i32 %i, i32 %length) {
+; CHECK-LABEL: dont_fold_guard3
; CHECK: guard(i1 %cmp)
-
-entry:
- br label %BBPred
-
-BBPred:
- %cond = icmp eq i8* %addr, null
- br i1 %cond, label %zero, label %not_zero
-
-zero:
- unreachable
-
-not_zero:
call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
- br i1 %cmp, label %unreachedBB2, label %unreachedBB1
+ br i1 %cmp, label %BB1, label %BB2
-unreachedBB2:
- call void @never_called()
+BB1:
+ call void @never_called(i1 %cmp)
ret void
-unreachedBB1:
+BB2:
ret void
}
+declare void @f(i1)
; Same as dont_fold_guard1 but use switch instead of branch.
; triggers source code `ProcessThreadableEdges`.
-declare void @f(i1)
-define void @dont_fold_guard3(i1 %cmp1, i32 %i) nounwind {
-; CHECK-LABEL: dont_fold_guard3
+define void @dont_fold_guard4(i1 %cmp1, i32 %i) nounwind {
+; CHECK-LABEL: dont_fold_guard4
; CHECK-LABEL: L2:
; CHECK-NEXT: %cmp = icmp eq i32 %i, 0
; CHECK-NEXT: guard(i1 %cmp)
-; CHECK-NEXT: @f(i1 %cmp)
+; CHECK-NEXT: dummy(i1 true)
+; CHECK-NEXT: @f(i1 true)
; CHECK-NEXT: ret void
entry:
br i1 %cmp1, label %L0, label %L3
L0:
%cmp = icmp eq i32 %i, 0
call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+ call void @dummy(i1 %cmp)
switch i1 %cmp, label %L3 [
i1 false, label %L1
i1 true, label %L2
diff --git a/test/Transforms/LoopIdiom/pr33114.ll b/test/Transforms/LoopIdiom/pr33114.ll
new file mode 100644
index 000000000000..fa44d8e31e7c
--- /dev/null
+++ b/test/Transforms/LoopIdiom/pr33114.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Check that we're not crashing while looking at the recurrence variable.
+; RUN: opt -S -loop-idiom %s | FileCheck %s
+
+define void @tinkywinky() {
+; CHECK-LABEL: @tinkywinky(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[PH:%.*]]
+; CHECK: ph:
+; CHECK-NEXT: [[MYPHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[IF_END:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: [[PATATINO:%.*]] = ashr i32 [[MYPHI]], undef
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[PATATINO]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[EXIT_LOOPEXIT:%.*]], label [[IF_END]]
+; CHECK: exit.loopexit:
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 true, label %exit, label %ph
+
+ph:
+ %myphi = phi i32 [ 1, %entry ]
+ br label %if.end
+
+if.end:
+ %patatino = ashr i32 %myphi, undef
+ %tobool = icmp eq i32 %patatino, 0
+ br i1 %tobool, label %exit, label %if.end
+
+exit:
+ ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll b/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
index 3adb8bcf514d..00c3222b0051 100644
--- a/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
@@ -25,7 +25,7 @@ L2: ; preds = %idxend.8
if6: ; preds = %idxend.8
%r2 = add i64 %0, -1
%r3 = load i64, i64* %1, align 8
-; CHECK-NOT: %r2
+; CHECK: %r2 = add i64 %0, -1
; CHECK: %r3 = load i64
br label %ib
@@ -36,13 +36,11 @@ ib: ; preds = %if6
%r4 = mul i64 %r3, %r0
%r5 = add i64 %r2, %r4
%r6 = icmp ult i64 %r5, undef
-; CHECK: [[MUL1:%[0-9]+]] = mul i64 %lsr.iv, %r3
-; CHECK: [[ADD1:%[0-9]+]] = add i64 [[MUL1]], -1
-; CHECK: add i64 %{{.}}, [[ADD1]]
-; CHECK: %r6
+; CHECK: %r4 = mul i64 %r3, %lsr.iv
+; CHECK: %r5 = add i64 %r2, %r4
+; CHECK: %r6 = icmp ult i64 %r5, undef
+; CHECK: %r7 = getelementptr i64, i64* undef, i64 %r5
%r7 = getelementptr i64, i64* undef, i64 %r5
store i64 1, i64* %r7, align 8
-; CHECK: [[MUL2:%[0-9]+]] = mul i64 %lsr.iv, %r3
-; CHECK: [[ADD2:%[0-9]+]] = add i64 [[MUL2]], -1
br label %L
}
diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
index aa688d999e60..a7731bfcec56 100644
--- a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
@@ -1,5 +1,14 @@
+; REQUIRES: x86
; RUN: opt -loop-reduce -S < %s | FileCheck %s
+; Strength reduction analysis here relies on IV Users analysis, that
+; only finds users among instructions with types that are treated as
+; legal by the data layout. When running this test on pure non-x86
+; configs (for example, ARM 64), it gets confused with the target
+; triple and uses a default data layout instead. This default layout
+; does not have any legal types (even i32), so the transformation
+; does not happen.
+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx"
@@ -7,16 +16,23 @@ target triple = "x86_64-apple-macosx"
;
; SCEV expander cannot expand quadratic recurrences outside of the
; loop. This recurrence depends on %sub.us, so can't be expanded.
+; We cannot fold SCEVUnknown (sub.us) with recurrences since it is
+; declared after the loop.
;
; CHECK-LABEL: @test2
; CHECK-LABEL: test2.loop:
-; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
-; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
+; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1
+; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
;
; CHECK-LABEL: for.end:
-; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
-; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
-; CHECK: %f = ashr i32 %sext.us, 24
+; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0
+; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
+; CHECK: %1 = sub i32 0, %sub.us
+; CHECK: %2 = add i32 %1, %lsr.iv.next
+; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2
+; CHECK: %f = ashr i32 %sext.us, 24
; CHECK: ret i32 %f
define i32 @test2() {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/nonintegral.ll b/test/Transforms/LoopStrengthReduce/nonintegral.ll
new file mode 100644
index 000000000000..5648e3aa74af
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/nonintegral.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; Address Space 10 is non-integral. The optimizer is not allowed to use
+; ptrtoint/inttoptr instructions. Make sure that this doesn't happen
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) {
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+; How exactly SCEV chooses to materialize isn't all that important, as
+; long as it doesn't try to round-trip through integers. As of this writing,
+; it emits a byte-wise gep, which is fine.
+; CHECK: getelementptr i64, i64 addrspace(10)* {{.*}}, i64 {{.*}}
+top:
+ br label %L86
+
+L86: ; preds = %L86, %top
+ %i.0 = phi i64 [ 0, %top ], [ %tmp, %L86 ]
+ %tmp = add i64 %i.0, 1
+ br i1 undef, label %L86, label %if29
+
+if29: ; preds = %L86
+ %tmp1 = shl i64 %tmp, 1
+ %tmp2 = add i64 %tmp1, -2
+ br label %if31
+
+if31: ; preds = %if38, %if29
+ %"#temp#1.sroa.0.022" = phi i64 [ 0, %if29 ], [ %tmp3, %if38 ]
+ br label %L119
+
+L119: ; preds = %L119, %if31
+ %i5.0 = phi i64 [ %"#temp#1.sroa.0.022", %if31 ], [ %tmp3, %L119 ]
+ %tmp3 = add i64 %i5.0, 1
+ br i1 undef, label %L119, label %if38
+
+if38: ; preds = %L119
+ %tmp4 = add i64 %tmp2, %i5.0
+ %tmp5 = getelementptr i64, i64 addrspace(10)* %arg, i64 %tmp4
+ %tmp6 = load i64, i64 addrspace(10)* %tmp5
+ br i1 undef, label %done, label %if31
+
+done: ; preds = %if38
+ ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index fbf55fd81d23..cbf177c0d4b9 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -25,6 +25,8 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 %radix, %struct.Vector2*
entry:
%buffer = alloca [33 x i16], align 16
%add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33
+ %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
+ %sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64
br label %do.body
do.body: ; preds = %do.body, %entry
@@ -46,8 +48,6 @@ do.body: ; preds = %do.body, %entry
do.end: ; preds = %do.body
%xap.0 = inttoptr i64 %0 to i1*
%cap.0 = ptrtoint i1* %xap.0 to i64
- %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
- %sub.ptr.rhs.cast = ptrtoint i16* %incdec.ptr to i64
%sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
%sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1
%conv11 = trunc i64 %sub.ptr.div39 to i32
diff --git a/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
new file mode 100644
index 000000000000..a7f414b8694b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
@@ -0,0 +1,26 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -S -debug-only=loop-vectorize 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: all_scalar
+; CHECK: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2
+; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
+; CHECK: LV: Not considering vector loop of width 2 because it will not generate any vector instructions
+;
+define void @all_scalar(i64* %a, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %tmp0 = getelementptr i64, i64* %a, i64 %i
+ store i64 0, i64* %tmp0, align 1
+ %i.next = add nuw nsw i64 %i, 2
+ %cond = icmp eq i64 %i.next, %n
+ br i1 %cond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/SystemZ/addressing.ll b/test/Transforms/LoopVectorize/SystemZ/addressing.ll
new file mode 100644
index 000000000000..1f7a6d29c57c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/SystemZ/addressing.ll
@@ -0,0 +1,72 @@
+; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize -dce \
+; RUN: -instcombine -force-vector-width=2 < %s | FileCheck %s
+;
+; Test that loop vectorizer does not generate vector addresses that must then
+; always be extracted.
+
+; Check that the addresses for a scalarized memory access is not extracted
+; from a vector register.
+define i32 @foo(i32* nocapture %A) {
+;CHECK-LABEL: @foo(
+;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+;CHECK: %0 = shl nsw i64 %index, 2
+;CHECK: %1 = shl i64 %index, 2
+;CHECK: %2 = or i64 %1, 4
+;CHECK: %3 = getelementptr inbounds i32, i32* %A, i64 %0
+;CHECK: %4 = getelementptr inbounds i32, i32* %A, i64 %2
+;CHECK: store i32 4, i32* %3, align 4
+;CHECK: store i32 4, i32* %4, align 4
+
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
+ store i32 4, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret i32 undef
+}
+
+
+; Check that a load of address is scalarized.
+define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
+;CHECK-LABEL: @foo1(
+;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+;CHECK: %0 = or i64 %index, 1
+;CHECK: %1 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %index
+;CHECK: %2 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %0
+;CHECK: %3 = load i32*, i32** %1, align 8
+;CHECK: %4 = load i32*, i32** %2, align 8
+;CHECK: %5 = load i32, i32* %3, align 4
+;CHECK: %6 = load i32, i32* %4, align 4
+;CHECK: %7 = insertelement <2 x i32> undef, i32 %5, i32 0
+;CHECK: %8 = insertelement <2 x i32> %7, i32 %6, i32 1
+;CHECK: %9 = getelementptr inbounds i32, i32* %A, i64 %index
+;CHECK: %10 = bitcast i32* %9 to <2 x i32>*
+;CHECK: store <2 x i32> %8, <2 x i32>* %10, align 4
+
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %ptr = getelementptr inbounds i32*, i32** %PtrPtr, i64 %indvars.iv
+ %el = load i32*, i32** %ptr
+ %v = load i32, i32* %el
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ store i32 %v, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index b2933c4b56f2..4dc62d86453f 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -11,38 +11,38 @@
; break;
; }
; }
+; File, line, and column should match those specified in the metadata
+; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:4:5: loop not vectorized
; void test_disabled(int *A, int Length) {
; #pragma clang loop vectorize(disable) interleave(disable)
; for (int i = 0; i < Length; i++)
; A[i] = i;
; }
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
; void test_array_bounds(int *A, int *B, int Length) {
; #pragma clang loop vectorize(enable)
; for (int i = 0; i < Length; i++)
; A[i] = A[B[i]];
; }
-
-; File, line, and column should match those specified in the metadata
-; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
-; CHECK: remark: source.cpp:4:5: loop not vectorized
-; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
; CHECK: remark: source.cpp:19:5: loop not vectorized
; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
-; CHECK: _Z4testPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z13test_disabledPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z17test_array_boundsPiS_i
-; CHECK-NOT: x i32>
-; CHECK: ret
+; int foo();
+; void test_multiple_failures(int *A) {
+; int k = 0;
+; #pragma clang loop vectorize(enable) interleave(enable)
+; for (int i = 0; i < 1000; i+=A[i]) {
+; if (A[i])
+; k = foo();
+; }
+; return k;
+; }
+; CHECK: remark: source.cpp:29:7: loop not vectorized: control flow cannot be substituted for a select
+; CHECK: remark: source.cpp:27:3: loop not vectorized
; YAML: --- !Analysis
; YAML-NEXT: Pass: loop-vectorize
@@ -98,6 +98,41 @@
; YAML-NEXT: - String: 'loop not vectorized: '
; YAML-NEXT: - String: failed explicitly specified loop vectorization
; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: NoCFGForSelect
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 29, Column: 7 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: control flow cannot be substituted for a select
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: NonReductionValueUsedOutsideLoop
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: value that could not be identified as reduction is used outside the loop
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: CantComputeNumberOfIterations
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: could not determine number of loop iterations
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Missed
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: MissedDetails
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: loop not vectorized
+; YAML-NEXT: ...
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -124,6 +159,10 @@ for.end: ; preds = %for.body, %entry
ret void, !dbg !24
}
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
entry:
@@ -144,6 +183,10 @@ for.end: ; preds = %for.body, %entry
ret void, !dbg !31
}
+; CHECK: _Z13test_disabledPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
entry:
@@ -174,6 +217,45 @@ for.end: ; preds = %for.end.loopexit, %
ret void, !dbg !36
}
+; CHECK: _Z17test_array_boundsPiS_i
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; Function Attrs: nounwind uwtable
+define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 !dbg !46 {
+entry:
+ br label %for.body, !dbg !38
+
+for.body: ; preds = %entry, %for.inc
+ %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+ %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09, !dbg !40
+ %0 = load i32, i32* %arrayidx, align 4, !dbg !40
+ %tobool = icmp eq i32 %0, 0, !dbg !40
+ br i1 %tobool, label %for.inc, label %if.then, !dbg !40
+
+if.then: ; preds = %for.body
+ %call = tail call i32 (...) @foo(), !dbg !41
+ %.pre = load i32, i32* %arrayidx, align 4
+ br label %for.inc, !dbg !42
+
+for.inc: ; preds = %for.body, %if.then
+ %1 = phi i32 [ %.pre, %if.then ], [ 0, %for.body ], !dbg !43
+ %k.1 = phi i32 [ %call, %if.then ], [ %k.09, %for.body ]
+ %add = add nsw i32 %1, %i.09, !dbg !44
+ %cmp = icmp slt i32 %add, 1000, !dbg !45
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !38
+
+for.cond.cleanup: ; preds = %for.inc
+ ret i32 %k.1, !dbg !39
+}
+
+declare i32 @foo(...)
+
+; CHECK: test_multiple_failure
+; CHECK-NOT: x i32>
+; CHECK: ret
+
attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
@@ -216,3 +298,13 @@ attributes #0 = { nounwind }
!34 = !{!34, !15}
!35 = !DILocation(line: 19, column: 5, scope: !33)
!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, variables: !2)
diff --git a/test/Transforms/NewGVN/pr32403.ll b/test/Transforms/NewGVN/pr32403.ll
index 2552e0e66ab9..505d31a9463e 100644
--- a/test/Transforms/NewGVN/pr32403.ll
+++ b/test/Transforms/NewGVN/pr32403.ll
@@ -17,7 +17,8 @@ define void @reorder_ref_pic_list() local_unnamed_addr {
; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[REFIDXLX_0]], 1
; CHECK-NEXT: br label [[FOR_BODY8_I:%.*]]
; CHECK: for.body8.i:
-; CHECK-NEXT: br i1 undef, label [[FOR_INC24_I:%.*]], label [[IF_THEN17_I:%.*]]
+; CHECK-NEXT: [[NIDX_052_I:%.*]] = phi i32 [ [[INC_I]], [[IF_THEN13]] ], [ [[NIDX_052_I]], [[FOR_INC24_I:%.*]] ]
+; CHECK-NEXT: br i1 undef, label [[FOR_INC24_I]], label [[IF_THEN17_I:%.*]]
; CHECK: if.then17.i:
; CHECK-NEXT: br label [[FOR_INC24_I]]
; CHECK: for.inc24.i:
diff --git a/test/Transforms/NewGVN/pr32836.ll b/test/Transforms/NewGVN/pr32836.ll
new file mode 100644
index 000000000000..623f216101bf
--- /dev/null
+++ b/test/Transforms/NewGVN/pr32836.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -newgvn %s | FileCheck %s
+
+%struct.anon = type { i32 }
+@b = external global %struct.anon
+define void @tinkywinky(i1 %patatino) {
+; CHECK-LABEL: @tinkywinky(
+; CHECK-NEXT: store i32 8, i32* null
+; CHECK-NEXT: br i1 [[PATATINO:%.*]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: br label [[L:%.*]]
+; CHECK: L:
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* null
+; CHECK-NEXT: [[BF_LOAD1:%.*]] = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i32 [[TMP1]], 536870911
+; CHECK-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -536870912
+; CHECK-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+; CHECK-NEXT: store i32 [[BF_SET]], i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+; CHECK-NEXT: br label [[LOR_END:%.*]]
+; CHECK: lor.end:
+; CHECK-NEXT: br label [[L]]
+;
+ store i32 8, i32* null
+ br i1 %patatino, label %if.end, label %if.then
+if.then:
+ store i32 8, i32* null
+ br label %L
+L:
+ br label %if.end
+if.end:
+ %tmp1 = load i32, i32* null
+ %bf.load1 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+ %bf.value = and i32 %tmp1, 536870911
+ %bf.clear = and i32 %bf.load1, -536870912
+ %bf.set = or i32 %bf.clear, %bf.value
+ store i32 %bf.set, i32* getelementptr (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+ br label %lor.end
+lor.end:
+ %bf.load4 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+ %tmp4 = and i32 %bf.load4, 536870911
+ %or = or i32 0, %tmp4
+ br label %L
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
index 3ac3c5138ae7..a97e3f81a8ef 100644
--- a/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
+++ b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
@@ -382,3 +382,64 @@ loop_exit2:
; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]
; CHECK-NEXT: ret i32 %[[R]]
}
+
+; This test, extracted from the LLVM test suite, has an interesting dominator
+; tree to update as there are edges to sibling domtree nodes within child
+; domtree nodes of the unswitched node.
+define void @xgets(i1 %cond1, i1* %cond2.ptr) {
+; CHECK-LABEL: @xgets(
+entry:
+ br label %for.cond.preheader
+; CHECK: entry:
+; CHECK-NEXT: br label %for.cond.preheader
+
+for.cond.preheader:
+ br label %for.cond
+; CHECK: for.cond.preheader:
+; CHECK-NEXT: br i1 %cond1, label %for.cond.preheader.split, label %if.end17.thread.loopexit
+;
+; CHECK: for.cond.preheader.split:
+; CHECK-NEXT: br label %for.cond
+
+for.cond:
+ br i1 %cond1, label %land.lhs.true, label %if.end17.thread.loopexit
+; CHECK: for.cond:
+; CHECK-NEXT: br label %land.lhs.true
+
+land.lhs.true:
+ br label %if.then20
+; CHECK: land.lhs.true:
+; CHECK-NEXT: br label %if.then20
+
+if.then20:
+ %cond2 = load volatile i1, i1* %cond2.ptr
+ br i1 %cond2, label %if.then23, label %if.else
+; CHECK: if.then20:
+; CHECK-NEXT: %[[COND2:.*]] = load volatile i1, i1* %cond2.ptr
+; CHECK-NEXT: br i1 %[[COND2]], label %if.then23, label %if.else
+
+if.else:
+ br label %for.cond
+; CHECK: if.else:
+; CHECK-NEXT: br label %for.cond
+
+if.end17.thread.loopexit:
+ br label %if.end17.thread
+; CHECK: if.end17.thread.loopexit:
+; CHECK-NEXT: br label %if.end17.thread
+
+if.end17.thread:
+ br label %cleanup
+; CHECK: if.end17.thread:
+; CHECK-NEXT: br label %cleanup
+
+if.then23:
+ br label %cleanup
+; CHECK: if.then23:
+; CHECK-NEXT: br label %cleanup
+
+cleanup:
+ ret void
+; CHECK: cleanup:
+; CHECK-NEXT: ret void
+}