diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-29 16:25:25 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-29 16:25:25 +0000 |
commit | ab44ce3d598882e51a25eb82eb7ae6308de85ae6 (patch) | |
tree | 568d786a59d49bef961dcb9bd09d422701b9da5b /test/Transforms | |
parent | b5630dbadf9a2a06754194387d6b0fd9962a67f1 (diff) |
Notes
Diffstat (limited to 'test/Transforms')
38 files changed, 2542 insertions, 265 deletions
diff --git a/test/Transforms/Coroutines/coro-debug.ll b/test/Transforms/Coroutines/coro-debug.ll new file mode 100644 index 000000000000..4da545499f94 --- /dev/null +++ b/test/Transforms/Coroutines/coro-debug.ll @@ -0,0 +1,142 @@ +; Tests that debug information is sane after coro-split +; RUN: opt < %s -coro-split -S | FileCheck %s + +source_filename = "simple-repro.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind +define i8* @f(i32 %x) #0 !dbg !6 { +entry: + %x.addr = alloca i32, align 4 + %coro_hdl = alloca i8*, align 8 + store i32 %x, i32* %x.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14 + call void @llvm.dbg.declare(metadata i8** %coro_hdl, metadata !15, metadata !13), !dbg !16 + %0 = call token @llvm.coro.id(i32 0, i8* null, i8* bitcast (i8* (i32)* @f to i8*), i8* null), !dbg !16 + %1 = call i64 @llvm.coro.size.i64(), !dbg !16 + %call = call i8* @malloc(i64 %1), !dbg !16 + %2 = call i8* @llvm.coro.begin(token %0, i8* %call) #7, !dbg !16 + store i8* %2, i8** %coro_hdl, align 8, !dbg !16 + %3 = call i8 @llvm.coro.suspend(token none, i1 false), !dbg !17 + %conv = sext i8 %3 to i32, !dbg !17 + call void @coro.devirt.trigger(i8* null) + switch i32 %conv, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + ], !dbg !17 + +sw.bb: ; preds = %entry + br label %sw.epilog, !dbg !18 + +sw.bb1: ; preds = %entry + br label %coro_Cleanup, !dbg !18 + +sw.default: ; preds = %entry + br label %coro_Suspend, !dbg !18 + +sw.epilog: ; preds = %sw.bb + %4 = load i32, i32* %x.addr, align 4, !dbg !20 + %add = add nsw i32 %4, 1, !dbg !21 + store i32 %add, i32* %x.addr, align 4, !dbg !22 + br label %coro_Cleanup, !dbg !23 + +coro_Cleanup: ; preds = %sw.epilog, %sw.bb1 + %5 = load i8*, i8** %coro_hdl, align 8, !dbg !24 + %6 = call i8* @llvm.coro.free(token %0, i8* %5), !dbg !24 + call void @free(i8* %6), !dbg !24 + br label %coro_Suspend, !dbg !24 + +coro_Suspend: ; preds = %coro_Cleanup, %sw.default + %7 = call i1 @llvm.coro.end(i8* null, i1 false) #7, !dbg !24 + %8 = load i8*, i8** %coro_hdl, align 8, !dbg !24 + ret i8* %8, !dbg !24 +} + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: argmemonly nounwind readonly +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #2 + +declare i8* @malloc(i64) #3 + +; Function Attrs: nounwind readnone +declare i64 @llvm.coro.size.i64() #4 + +; Function Attrs: nounwind +declare i8* @llvm.coro.begin(token, i8* writeonly) #5 + +; Function Attrs: nounwind +declare i8 @llvm.coro.suspend(token, i1) #5 + +declare void @free(i8*) #3 + +; Function Attrs: argmemonly nounwind readonly +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 + +; Function Attrs: nounwind +declare i1 @llvm.coro.end(i8*, i1) #5 + +; Function Attrs: alwaysinline +define private void @coro.devirt.trigger(i8*) #6 { +entry: + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #2 + +attributes #0 = { noinline nounwind "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { argmemonly nounwind readonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind readnone } +attributes #5 = { nounwind } +attributes #6 = { alwaysinline } +attributes #7 = { noduplicate } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (http://llvm.org/git/clang.git 97b002238b11ff30d94d0516d6a0515db5725fd8) (http://llvm.org/git/llvm.git 0cb060ba567f1aa5b4b04e86665f88e4632b528a)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "<stdin>", directory: "C:\5CGitHub\5Cllvm\5Cbuild\5CDebug\5Cbin") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git 97b002238b11ff30d94d0516d6a0515db5725fd8) (http://llvm.org/git/llvm.git 0cb060ba567f1aa5b4b04e86665f88e4632b528a)"} +!6 = distinct !DISubprogram(name: "f", linkageName: "flink", scope: !7, file: !7, line: 55, type: !8, isLocal: false, isDefinition: true, scopeLine: 55, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!7 = !DIFile(filename: "simple-repro.c", directory: "C:\5CGitHub\5Cllvm\5Cbuild\5CDebug\5Cbin") +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !DILocalVariable(name: "x", arg: 1, scope: !6, file: !7, line: 55, type: !11) +!13 = !DIExpression() +!14 = !DILocation(line: 55, column: 13, scope: !6) +!15 = !DILocalVariable(name: "coro_hdl", scope: !6, file: !7, line: 56, type: !10) +!16 = !DILocation(line: 56, column: 3, scope: !6) +!17 = !DILocation(line: 58, column: 5, scope: !6) +!18 = !DILocation(line: 58, column: 5, scope: !19) +!19 = distinct !DILexicalBlock(scope: !6, file: !7, line: 58, column: 5) +!20 = !DILocation(line: 59, column: 9, scope: !6) +!21 = !DILocation(line: 59, column: 10, scope: !6) +!22 = !DILocation(line: 59, column: 7, scope: !6) +!23 = !DILocation(line: 59, column: 5, scope: !6) +!24 = !DILocation(line: 62, column: 3, scope: !6) + +; CHECK: define i8* @f(i32 %x) #0 !dbg ![[ORIG:[0-9]+]] +; CHECK: define internal fastcc void @f.resume(%f.Frame* %FramePtr) #0 !dbg ![[RESUME:[0-9]+]] +; CHECK: define internal fastcc void @f.destroy(%f.Frame* %FramePtr) #0 !dbg ![[DESTROY:[0-9]+]] +; CHECK: define internal fastcc void @f.cleanup(%f.Frame* %FramePtr) #0 !dbg ![[CLEANUP:[0-9]+]] + +; CHECK: ![[ORIG]] = distinct !DISubprogram(name: "f", linkageName: "flink" +; CHECK: !DILocalVariable(name: "x", arg: 1, scope: ![[ORIG]] + +; CHECK: ![[RESUME]] = distinct !DISubprogram(name: "f", linkageName: "flink" +; CHECK: !DILocalVariable(name: "x", arg: 1, scope: ![[RESUME]] + +; CHECK: ![[DESTROY]] = distinct !DISubprogram(name: "f", linkageName: "flink" + +; CHECK: ![[CLEANUP]] = distinct !DISubprogram(name: "f", linkageName: "flink" diff --git a/test/Transforms/Coroutines/coro-frame.ll b/test/Transforms/Coroutines/coro-frame.ll index 001012fcd0c9..826d3a04fa1e 100644 --- a/test/Transforms/Coroutines/coro-frame.ll +++ b/test/Transforms/Coroutines/coro-frame.ll @@ -1,8 +1,11 @@ ; Check that we can handle spills of the result of the invoke instruction ; RUN: opt < %s -coro-split -S | FileCheck %s -define i8* @f() "coroutine.presplit"="1" personality i32 0 { +define i8* @f(i64 %this) "coroutine.presplit"="1" personality i32 0 { entry: + %this.addr = alloca i64 + store i64 %this, i64* %this.addr + %this1 = load i64, i64* %this.addr %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %size = call i32 @llvm.coro.size.i32() %alloc = call i8* @malloc(i32 %size) @@ -15,6 +18,7 @@ cont: i8 1, label %cleanup] resume: call double @print(double %r) + call void @print2(i64 %this1) br label %cleanup cleanup: @@ -30,12 +34,12 @@ pad: } ; See if the float was added to the frame -; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, double } +; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i64, double } ; See if the float was spilled into the frame ; CHECK-LABEL: @f( ; CHECK: %r = call double @print( -; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 ; CHECK: store double %r, double* %r.spill.addr ; CHECK: ret i8* %hdl @@ -58,4 +62,5 @@ declare i1 @llvm.coro.end(i8*, i1) declare noalias i8* @malloc(i32) declare double @print(double) +declare void @print2(i64) declare void @free(i8*) diff --git a/test/Transforms/Coroutines/coro-materialize.ll b/test/Transforms/Coroutines/coro-materialize.ll new file mode 100644 index 000000000000..95e8a049ad2f --- /dev/null +++ b/test/Transforms/Coroutines/coro-materialize.ll @@ -0,0 +1,52 @@ +; Verifies that we materialize instruction across suspend points +; RUN: opt < %s -coro-split -S | FileCheck %s + +define i8* @f(i32 %n) "coroutine.presplit"="1" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + + %inc1 = add i32 %n, 1 + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume1 + i8 1, label %cleanup] +resume1: + %inc2 = add i32 %inc1, 1 + %sp2 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume2 + i8 1, label %cleanup] + +resume2: + call void @print(i32 %inc1) + call void @print(i32 %inc2) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; See that we only spilled one value +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32 } +; CHECK-LABEL: @f( + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @print(i32) +declare void @free(i8*) diff --git a/test/Transforms/EarlyCSE/const-speculation.ll b/test/Transforms/EarlyCSE/const-speculation.ll new file mode 100644 index 000000000000..5b7f2f5b6982 --- /dev/null +++ b/test/Transforms/EarlyCSE/const-speculation.ll @@ -0,0 +1,39 @@ +; RUN: opt -early-cse -S %s | FileCheck %s + +%mystruct = type { i32 } + +; @var is global so that *every* GEP argument is Constant. +@var = external global %mystruct + +; Control flow is to make the dominance tree consider the final icmp before it +; gets to simplify the purely constant one (%tst). Since that icmp uses the +; select that gets considered next. Finally the select simplification looks at +; the %tst icmp and we don't want it to speculate about what happens if "i32 0" +; is actually "i32 1", broken universes are automatic UB. +; +; In this case doing the speculation would create an invalid GEP(@var, 0, 1) and +; crash. + +define i1 @test_constant_speculation() { +; CHECK-LABEL: define i1 @test_constant_speculation +entry: + br i1 undef, label %end, label %select + +select: +; CHECK: select: +; CHECK-NOT: icmp +; CHECK-NOT: getelementptr +; CHECK-NOT: select + + %tst = icmp eq i32 1, 0 + %elt = getelementptr %mystruct, %mystruct* @var, i64 0, i32 0 + %sel = select i1 %tst, i32* null, i32* %elt + br label %end + +end: +; CHECK: end: +; CHECK: %tmp = phi i32* [ null, %entry ], [ getelementptr inbounds (%mystruct, %mystruct* @var, i64 0, i32 0), %select ] + %tmp = phi i32* [null, %entry], [%sel, %select] + %res = icmp eq i32* %tmp, null + ret i1 %res +} diff --git a/test/Transforms/GVN/PRE/phi-translate-2.ll b/test/Transforms/GVN/PRE/phi-translate-2.ll new file mode 100644 index 000000000000..b2993657c7f5 --- /dev/null +++ b/test/Transforms/GVN/PRE/phi-translate-2.ll @@ -0,0 +1,105 @@ +; RUN: opt < %s -gvn -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@a = common global [100 x i64] zeroinitializer, align 16 +@b = common global [100 x i64] zeroinitializer, align 16 +@g1 = common global i64 0, align 8 +@g2 = common global i64 0, align 8 +@g3 = common global i64 0, align 8 +declare i64 @goo(...) local_unnamed_addr #1 + +define void @test1(i64 %a, i64 %b, i64 %c, i64 %d) { +entry: + %mul = mul nsw i64 %b, %a + store i64 %mul, i64* @g1, align 8 + %t0 = load i64, i64* @g2, align 8 + %cmp = icmp sgt i64 %t0, 3 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %mul2 = mul nsw i64 %d, %c + store i64 %mul2, i64* @g2, align 8 + br label %if.end + +; Check phi-translate works and mul is removed. +; CHECK-LABEL: @test1( +; CHECK: if.end: +; CHECK: %[[MULPHI:.*]] = phi i64 [ {{.*}}, %if.then ], [ %mul, %entry ] +; CHECK-NOT: = mul +; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8 +if.end: ; preds = %if.then, %entry + %b.addr.0 = phi i64 [ %d, %if.then ], [ %b, %entry ] + %a.addr.0 = phi i64 [ %c, %if.then ], [ %a, %entry ] + %mul3 = mul nsw i64 %a.addr.0, %b.addr.0 + store i64 %mul3, i64* @g3, align 8 + ret void +} + +define void @test2(i64 %i) { +entry: + %arrayidx = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i + %t0 = load i64, i64* %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i + %t1 = load i64, i64* %arrayidx1, align 8 + %mul = mul nsw i64 %t1, %t0 + store i64 %mul, i64* @g1, align 8 + %cmp = icmp sgt i64 %mul, 3 + br i1 %cmp, label %if.then, label %if.end + +; Check phi-translate works for the phi generated by loadpre. A new mul will be +; inserted in if.then block. +; CHECK-LABEL: @test2( +; CHECK: if.then: +; CHECK: %[[MUL_THEN:.*]] = mul +; CHECK: br label %if.end +if.then: ; preds = %entry + %call = tail call i64 (...) @goo() #2 + store i64 %call, i64* @g2, align 8 + br label %if.end + +; CHECK: if.end: +; CHECK: %[[MULPHI:.*]] = phi i64 [ %[[MUL_THEN]], %if.then ], [ %mul, %entry ] +; CHECK-NOT: = mul +; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8 +if.end: ; preds = %if.then, %entry + %i.addr.0 = phi i64 [ 3, %if.then ], [ %i, %entry ] + %arrayidx3 = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i.addr.0 + %t2 = load i64, i64* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i.addr.0 + %t3 = load i64, i64* %arrayidx4, align 8 + %mul5 = mul nsw i64 %t3, %t2 + store i64 %mul5, i64* @g3, align 8 + ret void +} + +; Check phi-translate doesn't go through backedge, which may lead to incorrect +; pre transformation. +; CHECK: for.end: +; CHECK-NOT: %{{.*pre-phi}} = phi +; CHECK: ret void +define void @test3(i64 %N, i64* nocapture readonly %a) { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ] + %add = add nuw nsw i64 %i.0, 1 + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %add + %tmp0 = load i64, i64* %arrayidx, align 8 + %cmp = icmp slt i64 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %call = tail call i64 (...) @goo() #2 + %add1 = sub nsw i64 0, %call + %tobool = icmp eq i64 %tmp0, %add1 + br i1 %tobool, label %for.cond, label %for.end + +for.end: ; preds = %for.body, %for.cond + %i.0.lcssa = phi i64 [ %i.0, %for.body ], [ %i.0, %for.cond ] + %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.0.lcssa + %tmp1 = load i64, i64* %arrayidx2, align 8 + store i64 %tmp1, i64* @g1, align 8 + ret void +} + diff --git a/test/Transforms/GVN/PRE/pre-gep-load.ll b/test/Transforms/GVN/PRE/pre-gep-load.ll index 9eec8bb6455b..1b2b4d20d31d 100644 --- a/test/Transforms/GVN/PRE/pre-gep-load.ll +++ b/test/Transforms/GVN/PRE/pre-gep-load.ll @@ -37,7 +37,7 @@ sw.bb2: ; preds = %if.end, %entry %3 = load double, double* %arrayidx5, align 8 ; CHECK: sw.bb2: ; CHECK-NOT: sext -; CHECK-NEXT: phi double [ +; CHECK: phi double [ ; CHECK-NOT: load %sub6 = fsub double 3.000000e+00, %3 br label %return diff --git a/test/Transforms/GVN/PRE/pre-load.ll b/test/Transforms/GVN/PRE/pre-load.ll index 685df24f62b6..ffff2b7f08e5 100644 --- a/test/Transforms/GVN/PRE/pre-load.ll +++ b/test/Transforms/GVN/PRE/pre-load.ll @@ -72,7 +72,7 @@ block4: %PRE = load i32, i32* %P3 ret i32 %PRE ; CHECK: block4: -; CHECK-NEXT: phi i32 [ +; CHECK: phi i32 [ ; CHECK-NOT: load ; CHECK: ret i32 } @@ -104,7 +104,7 @@ block4: %PRE = load i32, i32* %P3 ret i32 %PRE ; CHECK: block4: -; CHECK-NEXT: phi i32 [ +; CHECK: phi i32 [ ; CHECK-NOT: load ; CHECK: ret i32 } @@ -263,7 +263,7 @@ block4: %PRE = load i32, i32* %P3 ret i32 %PRE ; CHECK: block4: -; CHECK-NEXT: phi i32 [ +; CHECK: phi i32 [ ; CHECK-NOT: load ; CHECK: ret i32 } diff --git a/test/Transforms/GVNSink/dither.ll b/test/Transforms/GVNSink/dither.ll new file mode 100644 index 000000000000..9717021aca82 --- /dev/null +++ b/test/Transforms/GVNSink/dither.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -S -gvn-sink | FileCheck %s + +; Because %tmp17 has flipped operands to its equivalents %tmp14 and %tmp7, we +; can't sink the zext as we'd need a shuffling PHI in between. +; +; Just sinking the zext isn't profitable, so ensure nothing is sunk. + +; CHECK-LABEL: @hoge +; CHECK-NOT: bb18.gvnsink.split +define void @hoge() { +bb: + br i1 undef, label %bb4, label %bb11 + +bb4: ; preds = %bb3 + br i1 undef, label %bb6, label %bb8 + +bb6: ; preds = %bb5 + %tmp = zext i16 undef to i64 + %tmp7 = add i64 %tmp, undef + br label %bb18 + +bb8: ; preds = %bb5 + %tmp9 = zext i16 undef to i64 + br label %bb18 + +bb11: ; preds = %bb10 + br i1 undef, label %bb12, label %bb15 + +bb12: ; preds = %bb11 + %tmp13 = zext i16 undef to i64 + %tmp14 = add i64 %tmp13, undef + br label %bb18 + +bb15: ; preds = %bb11 + %tmp16 = zext i16 undef to i64 + %tmp17 = add i64 undef, %tmp16 + br label %bb18 + +bb18: ; preds = %bb15, %bb12, %bb8, %bb6 + %tmp19 = phi i64 [ %tmp7, %bb6 ], [ undef, %bb8 ], [ %tmp14, %bb12 ], [ %tmp17, %bb15 ] + unreachable +} diff --git a/test/Transforms/GVNSink/indirect-call.ll b/test/Transforms/GVNSink/indirect-call.ll new file mode 100644 index 000000000000..da98ed0819a6 --- /dev/null +++ b/test/Transforms/GVNSink/indirect-call.ll @@ -0,0 +1,70 @@ +; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s + +declare i8 @ext(i1) + +define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) { +entry: + %cmp = icmp uge i32 %blksA, %nblks + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test1 +; CHECK: call i8 @ext +; CHECK: call i8 %ext +if.then: + %frombool1 = call i8 @ext(i1 %cmp) + br label %if.end + +if.else: + %frombool3 = call i8 %ext(i1 %cmp) + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) { +entry: + %cmp = icmp uge i32 %blksA, %nblks + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test2 +; CHECK: call i8 %ext +; CHECK-NOT: call +if.then: + %frombool1 = call i8 %ext(i1 %cmp) + br label %if.end + +if.else: + %frombool3 = call i8 %ext(i1 %cmp) + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +define zeroext i1 @test3(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext1, i8(i1)* %ext2) { +entry: + %cmp = icmp uge i32 %blksA, %nblks + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test3 +; CHECK: %[[x:.*]] = select i1 %flag, i8 (i1)* %ext1, i8 (i1)* %ext2 +; CHECK: call i8 %[[x]](i1 %cmp) +; CHECK-NOT: call +if.then: + %frombool1 = call i8 %ext1(i1 %cmp) + br label %if.end + +if.else: + %frombool3 = call i8 %ext2(i1 %cmp) + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} diff --git a/test/Transforms/GVNSink/sink-common-code.ll b/test/Transforms/GVNSink/sink-common-code.ll new file mode 100644 index 000000000000..d9e757cd10fc --- /dev/null +++ b/test/Transforms/GVNSink/sink-common-code.ll @@ -0,0 +1,694 @@ +; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s + +define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test1 +; CHECK: add +; CHECK: select +; CHECK: icmp +; CHECK-NOT: br +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test2 +; CHECK: add +; CHECK: select +; CHECK: icmp +; CHECK-NOT: br +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + %add = add i32 %nblks, %blksB + %cmp2 = icmp uge i32 %blksA, %add + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +declare i32 @foo(i32, i32) nounwind readnone + +define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone + %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone + br label %if.end + +if.else: + %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone + %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone + br label %if.end + +if.end: + %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ] + %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ] + %ret = add i32 %xx, %yy + ret i32 %ret +} + +; CHECK-LABEL: test3 +; CHECK: select +; CHECK: call +; CHECK: call +; CHECK: add +; CHECK-NOT: br + +define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test4 +; CHECK: select +; CHECK: store +; CHECK-NOT: store + +define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test5 +; CHECK: store volatile +; CHECK: store + +define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test6 +; CHECK: select +; CHECK: store volatile +; CHECK-NOT: store + +define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %z = load volatile i32, i32* %y + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test7 +; CHECK-DAG: select +; CHECK-DAG: load volatile +; CHECK: store volatile +; CHECK-NOT: load +; CHECK-NOT: store + +; The extra store in %if.then means %z and %w are not equivalent. +define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + store i32 7, i32* %p + %z = load volatile i32, i32* %y + store i32 6, i32* %p + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test9 +; CHECK: add +; CHECK: add + +%struct.anon = type { i32, i32 } + +; The GEP indexes a struct type so cannot have a variable last index. +define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 5 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 + store volatile i32 %x, i32* %gepa + br label %if.end + +if.else: + %dummy1 = add i32 %x, 6 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + store volatile i32 %x, i32* %gepb + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test10 +; CHECK: getelementptr +; CHECK: store volatile +; CHECK: getelementptr +; CHECK: store volatile + +; The shufflevector's mask operand cannot be merged in a PHI. +define i32 @test11(i1 zeroext %flag, i32 %w, <2 x i32> %x, <2 x i32> %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %w, 5 + %sv1 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 0, i32 1> + br label %if.end + +if.else: + %dummy1 = add i32 %w, 6 + %sv2 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 1, i32 0> + br label %if.end + +if.end: + %p = phi <2 x i32> [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test11 +; CHECK: shufflevector +; CHECK: shufflevector + +; We can't common an intrinsic! +define i32 @test12(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %w, 5 + %sv1 = call i32 @llvm.ctlz.i32(i32 %x) + br label %if.end + +if.else: + %dummy1 = add i32 %w, 6 + %sv2 = call i32 @llvm.cttz.i32(i32 %x) + br label %if.end + +if.end: + %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} + +declare i32 @llvm.ctlz.i32(i32 %x) readnone +declare i32 @llvm.cttz.i32(i32 %x) readnone + +; CHECK-LABEL: test12 +; CHECK: call i32 @llvm.ctlz +; CHECK: call i32 @llvm.cttz + +; The TBAA metadata should be properly combined. +define i32 @test13(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %z = load volatile i32, i32* %y + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y, !tbaa !3 + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y, !tbaa !4 + br label %if.end + +if.end: + ret i32 1 +} + +!0 = !{ !"an example type tree" } +!1 = !{ !"int", !0 } +!2 = !{ !"float", !0 } +!3 = !{ !"const float", !2, i64 0 } +!4 = !{ !"special float", !2, i64 1 } + +; CHECK-LABEL: test13 +; CHECK-DAG: select +; CHECK-DAG: load volatile +; CHECK: store volatile {{.*}}, !tbaa !0 +; CHECK-NOT: load +; CHECK-NOT: store + +; The call should be commoned. +define i32 @test13a(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %sv1 = call i32 @bar(i32 %x) + br label %if.end + +if.else: + %sv2 = call i32 @bar(i32 %y) + br label %if.end + +if.end: + %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} +declare i32 @bar(i32) + +; CHECK-LABEL: test13a +; CHECK: %[[x:.*]] = select i1 %flag +; CHECK: call i32 @bar(i32 %[[x]]) + +; The load should be commoned. +define i32 @test14(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 1 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + %sv1 = load i32, i32* %gepa + %cmp1 = icmp eq i32 %sv1, 56 + br label %if.end + +if.else: + %dummy2 = add i32 %x, 4 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + %sv2 = load i32, i32* %gepb + %cmp2 = icmp eq i32 %sv2, 57 + br label %if.end + +if.end: + %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test14 +; CHECK: getelementptr +; CHECK: load +; CHECK-NOT: load + +; The load should be commoned. +define i32 @test15(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 1 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 + %sv1 = load i32, i32* %gepa + %ext1 = zext i32 %sv1 to i64 + %cmp1 = icmp eq i64 %ext1, 56 + br label %if.end + +if.else: + %dummy2 = add i32 %x, 4 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + %sv2 = load i32, i32* %gepb + %ext2 = zext i32 %sv2 to i64 + %cmp2 = icmp eq i64 %ext2, 56 + br label %if.end + +if.end: + %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test15 +; CHECK: getelementptr +; CHECK: load +; CHECK-NOT: load + +define zeroext i1 @test_crash(i1 zeroext %flag, i32* %i4, i32* %m, i32* %n) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %tmp1 = load i32, i32* %i4 + %tmp2 = add i32 %tmp1, -1 + store i32 %tmp2, i32* %i4 + br label %if.end + +if.else: + %tmp3 = load i32, i32* %m + %tmp4 = load i32, i32* %n + %tmp5 = add i32 %tmp3, %tmp4 + store i32 %tmp5, i32* %i4 + br label %if.end + +if.end: + ret i1 true +} + +; CHECK-LABEL: test_crash +; No checks for test_crash - just ensure it doesn't crash! + +define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) { + +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test16 +; CHECK: zext +; CHECK: zext + +define zeroext i1 @test16a(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks, i8* %p) { + +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + %b1 = sext i8 %frombool1 to i32 + %b2 = trunc i32 %b1 to i8 + store i8 %b2, i8* %p + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + %a1 = sext i8 %frombool3 to i32 + %a2 = trunc i32 %a1 to i8 + store i8 %a2, i8* %p + br label %if.end + +if.end: + ret i1 true +} + +; CHECK-LABEL: test16a +; CHECK: zext +; CHECK-NOT: zext + +define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.end [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = call i8 @i1toi8(i1 %cmp) + %a1 = sext i8 %frombool1 to i32 + %a2 = trunc i32 %a1 to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = call i8 @i1toi8(i1 %cmp2) + %b1 = sext i8 %frombool3 to i32 + %b2 = trunc i32 %b1 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %a2, %if.then ], [ %b2, %if.then2 ], [ 0, %entry ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} +declare i8 @i1toi8(i1) + +; FIXME: DISABLED - we don't consider this profitable. We should +; - Consider argument setup/return mov'ing for calls, like InlineCost does. +; - Consider the removal of the %obeys.0 PHI (zero PHI movement overall) + +; DISABLED-CHECK-LABEL: test17 +; DISABLED-CHECK: if.then: +; DISABLED-CHECK-NEXT: icmp uge +; DISABLED-CHECK-NEXT: br label %[[x:.*]] + +; DISABLED-CHECK: if.then2: +; DISABLED-CHECK-NEXT: add +; DISABLED-CHECK-NEXT: icmp ule +; DISABLED-CHECK-NEXT: br label %[[x]] + +; DISABLED-CHECK: [[x]]: +; DISABLED-CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp +; DISABLED-CHECK-NEXT: %[[z:.*]] = call i8 @i1toi8(i1 %[[y]]) +; DISABLED-CHECK-NEXT: br label %if.end + +; DISABLED-CHECK: if.end: +; DISABLED-CHECK-NEXT: phi i8 +; DISABLED-CHECK-DAG: [ %[[z]], %[[x]] ] +; DISABLED-CHECK-DAG: [ 0, %entry ] + +define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.then3 [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.then3: + %add2 = add i32 %nblks, %blksA + %cmp3 = icmp ule i32 %add2, %blksA + %frombool4 = zext i1 %cmp3 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test18 +; CHECK: if.end: +; CHECK-NEXT: %[[x:.*]] = phi i1 +; CHECK-DAG: [ %cmp, %if.then ] +; CHECK-DAG: [ %cmp2, %if.then2 ] +; CHECK-DAG: [ %cmp3, %if.then3 ] +; CHECK-NEXT: zext i1 %[[x]] to i8 + +; The phi is confusing - both add instructions are used by it, but +; not on their respective unconditional arcs. It should not be +; optimized. +define void @test_pr30292(i1 %cond, i1 %cond2, i32 %a, i32 %b) { +entry: + %add1 = add i32 %a, 1 + br label %succ + +one: + br i1 %cond, label %two, label %succ + +two: + call void @g() + %add2 = add i32 %a, 1 + br label %succ + +succ: + %p = phi i32 [ 0, %entry ], [ %add1, %one ], [ %add2, %two ] + br label %one +} +declare void @g() + +; CHECK-LABEL: test_pr30292 +; CHECK: phi i32 [ 0, %entry ], [ %add1, %succ ], [ %add2, %two ] + +define zeroext i1 @test_pr30244(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) { + +entry: + %p = alloca i8 + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + store i8 %frombool1, i8* %p + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + store i8 %frombool3, i8* %p + br label %if.end + +if.end: + ret i1 true +} + +; CHECK-LABEL: @test_pr30244 +; CHECK: store +; CHECK-NOT: store + +define i32 @test_pr30373a(i1 zeroext %flag, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone + %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone + %z0 = lshr i32 %y0, 8 + br label %if.end + +if.else: + %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone + %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone + %z1 = lshr exact i32 %y1, 8 + br label %if.end + +if.end: + %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ] + %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ] + %ret = add i32 %xx, %yy + ret i32 %ret +} + +; CHECK-LABEL: test_pr30373a +; CHECK: lshr +; CHECK-NOT: exact +; CHECK: } + +define i32 @test_pr30373b(i1 zeroext %flag, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone + %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone + %z0 = lshr exact i32 %y0, 8 + br label %if.end + +if.else: + %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone + %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone + %z1 = lshr i32 %y1, 8 + br label %if.end + +if.end: + %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ] + %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ] + %ret = add i32 %xx, %yy + ret i32 %ret +} + +; CHECK-LABEL: test_pr30373b +; CHECK: lshr +; CHECK-NOT: exact +; CHECK: } + +; CHECK: !0 = !{!1, !1, i64 0} +; CHECK: !1 = !{!"float", !2} +; CHECK: !2 = !{!"an example type tree"} diff --git a/test/Transforms/GVNSink/struct.ll b/test/Transforms/GVNSink/struct.ll new file mode 100644 index 000000000000..2228cf2803ae --- /dev/null +++ b/test/Transforms/GVNSink/struct.ll @@ -0,0 +1,71 @@ +; RUN: opt -gvn-sink -S < %s | FileCheck %s + +%struct = type {i32, i32} +%struct2 = type { [ 2 x i32], i32 } + +; Struct indices cannot be variant. + +; CHECK-LABEL: @f() { +; CHECK: getelementptr +; CHECK: getelementptr +define void @f() { +bb: + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + %tmp = getelementptr inbounds %struct, %struct* null, i64 0, i32 1 + br label %bb4 + +bb2: ; preds = %bb + %tmp3 = getelementptr inbounds %struct, %struct* null, i64 0, i32 0 + br label %bb4 + +bb4: ; preds = %bb2, %bb1 + %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ] + ret void +} + +; Struct indices cannot be variant. + +; CHECK-LABEL: @g() { +; CHECK: getelementptr +; CHECK: getelementptr +define void @g() { +bb: + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + %tmp = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 1 + br label %bb4 + +bb2: ; preds = %bb + %tmp3 = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 0 + br label %bb4 + +bb4: ; preds = %bb2, %bb1 + %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ] + ret void +} + + +; ... but the first parameter to a GEP can. + +; CHECK-LABEL: @h() { +; CHECK: getelementptr +; CHECK-NOT: getelementptr +define void @h() { +bb: + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + %tmp = getelementptr inbounds %struct, %struct* null, i32 0, i32 0 + br label %bb4 + +bb2: ; preds = %bb + %tmp3 = getelementptr inbounds %struct, %struct* null, i32 1, i32 0 + br label %bb4 + +bb4: ; preds = %bb2, %bb1 + %tmp5 = phi i32 [ 0, %bb1 ], [ 1, %bb2 ] + ret void +}
\ No newline at end of file diff --git a/test/Transforms/GlobalDCE/externally_available.ll b/test/Transforms/GlobalDCE/externally_available.ll index fca49b29ec8e..bc54db38cee0 100644 --- a/test/Transforms/GlobalDCE/externally_available.ll +++ b/test/Transforms/GlobalDCE/externally_available.ll @@ -1,12 +1,21 @@ ; RUN: opt < %s -globaldce -S | FileCheck %s +; test_global should not be emitted to the .s file. +; CHECK-NOT: @test_global = +@test_global = available_externally global i32 4 + +; test_global2 is a normal global using an available externally function. +; CHECK: @test_global2 = +@test_global2 = global i32 ()* @test_function2 + ; test_function should not be emitted to the .s file. -; CHECK-NOT: test_function +; CHECK-NOT: define {{.*}} @test_function() define available_externally i32 @test_function() { ret i32 4 } -; test_global should not be emitted to the .s file. -; CHECK-NOT: test_global -@test_global = available_externally global i32 4 - +; test_function2 isn't actually dead even though it's available externally. +; CHECK: define available_externally i32 @test_function2() +define available_externally i32 @test_function2() { + ret i32 4 +} diff --git a/test/Transforms/Inline/prof-update-instr.ll b/test/Transforms/Inline/prof-update-instr.ll new file mode 100644 index 000000000000..6650165cb904 --- /dev/null +++ b/test/Transforms/Inline/prof-update-instr.ll @@ -0,0 +1,57 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s +; Checks if inliner updates VP metadata for indrect call instructions +; with instrumentation based profile. + +@func = global void ()* null +@func2 = global void ()* null + +; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]] +define void @callee(i32 %n) !prof !15 { + %cond = icmp sle i32 %n, 10 + br i1 %cond, label %cond_true, label %cond_false, !prof !20 +cond_true: +; f2 is optimized away, thus not updated. + %f2 = load void ()*, void ()** @func2 +; CHECK: call void %f2(), !prof ![[COUNT_IND_CALLEE1:[0-9]*]] + call void %f2(), !prof !19 + ret void +cond_false: + %f = load void ()*, void ()** @func +; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]] + call void %f(), !prof !18 + ret void +} + +; CHECK: define void @caller() +define void @caller() !prof !21 { +; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]] + call void @callee(i32 15) + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 2000} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"branch_weights", i64 2000} +!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} +!19 = !{!"VP", i32 0, i64 200, i64 111, i64 100, i64 222, i64 60, i64 333, i64 40} +!20 = !{!"branch_weights", i32 1000, i32 1000} +!21 = !{!"function_entry_count", i64 400} +attributes #0 = { alwaysinline } +; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} +; CHECK: ![[COUNT_IND_CALLEE1]] = !{!"VP", i32 0, i64 200, i64 111, i64 100, i64 222, i64 60, i64 333, i64 40} +; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12} +; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8} diff --git a/test/Transforms/Inline/prof-update.ll b/test/Transforms/Inline/prof-update-sample.ll index 4a4471e8e17a..4a4471e8e17a 100644 --- a/test/Transforms/Inline/prof-update.ll +++ b/test/Transforms/Inline/prof-update-sample.ll diff --git a/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll b/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll deleted file mode 100644 index 76e30399a666..000000000000 --- a/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: opt < %s -instcombine -S | not grep add -; RUN: opt < %s -instcombine -S | not grep mul -; PR2330 - -define i1 @f(i32 %x, i32 %y) nounwind { -entry: - %A = add i32 %x, 5 - %B = add i32 %y, 5 - %C = icmp eq i32 %A, %B - ret i1 %C -} - -define i1 @g(i32 %x, i32 %y) nounwind { -entry: - %A = mul i32 %x, 5 - %B = mul i32 %y, 5 - %C = icmp eq i32 %A, %B - ret i1 %C -} diff --git a/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll b/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll deleted file mode 100644 index b91457c79dea..000000000000 --- a/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll +++ /dev/null @@ -1,87 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s - -define i1 @test1(i8 %x, i8 %y) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: [[TMP:%.*]] = icmp ult i8 %x, %y -; CHECK-NEXT: ret i1 [[TMP]] -; - %X = xor i8 %x, 128 - %Y = xor i8 %y, 128 - %tmp = icmp slt i8 %X, %Y - ret i1 %tmp -} - -define i1 @test2(i8 %x, i8 %y) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: [[TMP:%.*]] = icmp slt i8 %x, %y -; CHECK-NEXT: ret i1 [[TMP]] -; - %X = xor i8 %x, 128 - %Y = xor i8 %y, 128 - %tmp = icmp ult i8 %X, %Y - ret i1 %tmp -} - -define i1 @test3(i8 %x) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i8 %x, -114 -; CHECK-NEXT: ret i1 [[TMP]] -; - %X = xor i8 %x, 128 - %tmp = icmp uge i8 %X, 15 - ret i1 %tmp -} - -define <2 x i1> @test3vec(<2 x i8> %x) { -; CHECK-LABEL: @test3vec( -; CHECK-NEXT: [[TMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -114, i8 -114> -; CHECK-NEXT: ret <2 x i1> [[TMP]] -; - %X = xor <2 x i8> %x, <i8 128, i8 128> - %tmp = icmp uge <2 x i8> %X, <i8 15, i8 15> - ret <2 x i1> %tmp -} - -define i1 @test4(i8 %x, i8 %y) { -; CHECK-LABEL: @test4( -; CHECK-NEXT: [[TMP:%.*]] = icmp ugt i8 %x, %y -; CHECK-NEXT: ret i1 [[TMP]] -; - %X = xor i8 %x, 127 - %Y = xor i8 %y, 127 - %tmp = icmp slt i8 %X, %Y - ret i1 %tmp -} - -define i1 @test5(i8 %x, i8 %y) { -; CHECK-LABEL: @test5( -; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i8 %x, %y -; CHECK-NEXT: ret i1 [[TMP]] -; - %X = xor i8 %x, 127 - %Y = xor i8 %y, 127 - %tmp = icmp ult i8 %X, %Y - ret i1 %tmp -} - -define i1 @test6(i8 %x) { -; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP:%.*]] = icmp slt i8 %x, 113 -; CHECK-NEXT: ret i1 [[TMP]] -; - %X = xor i8 %x, 127 - %tmp = icmp uge i8 %X, 15 - ret i1 %tmp -} - -define <2 x i1> @test6vec(<2 x i8> %x) { -; CHECK-LABEL: @test6vec( -; CHECK-NEXT: [[TMP:%.*]] = icmp slt <2 x i8> %x, <i8 113, i8 113> -; CHECK-NEXT: ret <2 x i1> [[TMP]] -; - %X = xor <2 x i8> %x, <i8 127, i8 127> - %tmp = icmp uge <2 x i8> %X, <i8 15, i8 15> - ret <2 x i1> %tmp -} - diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll index f81f700e6cf4..490830af2d82 100644 --- a/test/Transforms/InstCombine/alloca.ll +++ b/test/Transforms/InstCombine/alloca.ll @@ -51,8 +51,8 @@ define i32* @test4(i32 %n) { ret i32* %A } -; Allocas which are only used by GEPs, bitcasts, and stores (transitively) -; should be deleted. +; Allocas which are only used by GEPs, bitcasts, addrspacecasts, and stores +; (transitively) should be deleted. define void @test5() { ; CHECK-LABEL: @test5( ; CHECK-NOT: alloca @@ -62,6 +62,7 @@ define void @test5() { entry: %a = alloca { i32 } %b = alloca i32* + %c = alloca i32 %a.1 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0 store i32 123, i32* %a.1 store i32* %a.1, i32** %b @@ -73,6 +74,8 @@ entry: store atomic i32 3, i32* %a.3 release, align 4 %a.4 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0 store atomic i32 4, i32* %a.4 seq_cst, align 4 + %c.1 = addrspacecast i32* %c to i32 addrspace(1)* + store i32 123, i32 addrspace(1)* %c.1 ret void } diff --git a/test/Transforms/InstCombine/bitcast-vec-canon.ll b/test/Transforms/InstCombine/bitcast-vec-canon.ll index 97145221099e..a92a7b73fd7e 100644 --- a/test/Transforms/InstCombine/bitcast-vec-canon.ll +++ b/test/Transforms/InstCombine/bitcast-vec-canon.ll @@ -1,41 +1,40 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s define double @a(<1 x i64> %y) { +; CHECK-LABEL: @a( +; CHECK-NEXT: [[BC:%.*]] = bitcast <1 x i64> %y to <1 x double> +; CHECK-NEXT: [[C:%.*]] = extractelement <1 x double> [[BC]], i32 0 +; CHECK-NEXT: ret double [[C]] +; %c = bitcast <1 x i64> %y to double ret double %c - -; CHECK-LABEL: @a( -; CHECK-NEXT: bitcast <1 x i64> %y to <1 x double> -; CHECK-NEXT: extractelement <1 x double> {{.*}}, i32 0 -; CHECK-NEXT: ret double } define i64 @b(<1 x i64> %y) { +; CHECK-LABEL: @b( +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> %y, i32 0 +; CHECK-NEXT: ret i64 [[TMP1]] +; %c = bitcast <1 x i64> %y to i64 ret i64 %c - -; CHECK-LABEL: @b( -; CHECK-NEXT: extractelement <1 x i64> %y, i32 0 -; CHECK-NEXT: ret i64 } define <1 x i64> @c(double %y) { +; CHECK-LABEL: @c( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double %y to i64 +; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0 +; CHECK-NEXT: ret <1 x i64> [[C]] +; %c = bitcast double %y to <1 x i64> ret <1 x i64> %c - -; CHECK-LABEL: @c( -; CHECK-NEXT: bitcast double %y to i64 -; CHECK-NEXT: insertelement <1 x i64> undef, i64 {{.*}}, i32 0 -; CHECK-NEXT: ret <1 x i64> } define <1 x i64> @d(i64 %y) { +; CHECK-LABEL: @d( +; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 %y, i32 0 +; CHECK-NEXT: ret <1 x i64> [[C]] +; %c = bitcast i64 %y to <1 x i64> ret <1 x i64> %c - -; CHECK-LABEL: @d( -; CHECK-NEXT: insertelement <1 x i64> undef, i64 %y, i32 0 -; CHECK-NEXT: ret <1 x i64> } - diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll index 2e7f30fee14d..4cf3f27ab014 100644 --- a/test/Transforms/InstCombine/bitcast.ll +++ b/test/Transforms/InstCombine/bitcast.ll @@ -70,6 +70,51 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) { ret <2 x i32> %t2 } +; PR26702 - https://bugs.llvm.org//show_bug.cgi?id=26702 +; Bitcast is canonicalized below logic, so we can see the not-not pattern. + +define <2 x i64> @is_negative(<4 x i32> %x) { +; CHECK-LABEL: @is_negative( +; CHECK-NEXT: [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> +; CHECK-NEXT: [[NOTNOT:%.*]] = bitcast <4 x i32> [[LOBIT]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[NOTNOT]] +; + %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> + %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1> + %bc = bitcast <4 x i32> %not to <2 x i64> + %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1> + ret <2 x i64> %notnot +} + +; This variation has an extra bitcast at the end. This means that the 2nd xor +; can be done in <4 x i32> to eliminate a bitcast regardless of canonicalizaion. + +define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) { +; CHECK-LABEL: @is_negative_bonus_bitcast( +; CHECK-NEXT: [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> +; CHECK-NEXT: ret <4 x i32> [[LOBIT]] +; + %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> + %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1> + %bc = bitcast <4 x i32> %not to <2 x i64> + %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1> + %bc2 = bitcast <2 x i64> %notnot to <4 x i32> + ret <4 x i32> %bc2 +} + +; Negative test: bitcasts are canonicalized below bitwise logic. No changes here. + +define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) { +; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant( +; CHECK-NEXT: [[A:%.*]] = and <4 x i4> %x, <i4 0, i4 -8, i4 0, i4 -8> +; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i4> [[A]] to <2 x i8> +; CHECK-NEXT: ret <2 x i8> [[B]] +; + %a = and <4 x i4> %x, <i4 0, i4 8, i4 0, i4 8> + %b = bitcast <4 x i4> %a to <2 x i8> + ret <2 x i8> %b +} + ; PR27925 - https://llvm.org/bugs/show_bug.cgi?id=27925 define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) { diff --git a/test/Transforms/InstCombine/ctpop.ll b/test/Transforms/InstCombine/ctpop.ll index 38612c92aaa4..6bc6f9731979 100644 --- a/test/Transforms/InstCombine/ctpop.ll +++ b/test/Transforms/InstCombine/ctpop.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -instcombine | FileCheck %s declare i32 @llvm.ctpop.i32(i32) @@ -5,8 +6,9 @@ declare i8 @llvm.ctpop.i8(i8) declare void @llvm.assume(i1) define i1 @test1(i32 %arg) { -; CHECK: @test1 -; CHECK: ret i1 false +; CHECK-LABEL: @test1( +; CHECK-NEXT: ret i1 false +; %and = and i32 %arg, 15 %cnt = call i32 @llvm.ctpop.i32(i32 %and) %res = icmp eq i32 %cnt, 9 @@ -14,8 +16,9 @@ define i1 @test1(i32 %arg) { } define i1 @test2(i32 %arg) { -; CHECK: @test2 -; CHECK: ret i1 false +; CHECK-LABEL: @test2( +; CHECK-NEXT: ret i1 false +; %and = and i32 %arg, 1 %cnt = call i32 @llvm.ctpop.i32(i32 %and) %res = icmp eq i32 %cnt, 2 @@ -23,9 +26,12 @@ define i1 @test2(i32 %arg) { } define i1 @test3(i32 %arg) { -; CHECK: @test3 -; CHECK: ret i1 false - ;; Use an assume to make all the bits known without triggering constant +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[ASSUME:%.*]] = icmp eq i32 [[ARG:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[ASSUME]]) +; CHECK-NEXT: ret i1 false +; + ;; Use an assume to make all the bits known without triggering constant ;; folding. This is trying to hit a corner case where we have to avoid ;; taking the log of 0. %assume = icmp eq i32 %arg, 0 @@ -37,8 +43,11 @@ define i1 @test3(i32 %arg) { ; Negative test for when we know nothing define i1 @test4(i8 %arg) { -; CHECK: @test4 -; CHECK: ret i1 %res +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2 +; CHECK-NEXT: ret i1 [[RES]] +; %cnt = call i8 @llvm.ctpop.i8(i8 %arg) %res = icmp eq i8 %cnt, 2 ret i1 %res diff --git a/test/Transforms/InstCombine/icmp-xor-signbit.ll b/test/Transforms/InstCombine/icmp-xor-signbit.ll new file mode 100644 index 000000000000..30a9668f37df --- /dev/null +++ b/test/Transforms/InstCombine/icmp-xor-signbit.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b + +define i1 @slt_to_ult(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_to_ult( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 %x, %y +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = xor i8 %x, 128 + %b = xor i8 %y, 128 + %cmp = icmp slt i8 %a, %b + ret i1 %cmp +} + +; PR33138 - https://bugs.llvm.org/show_bug.cgi?id=33138 + +define <2 x i1> @slt_to_ult_splat(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @slt_to_ult_splat( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> %x, %y +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %a = xor <2 x i8> %x, <i8 128, i8 128> + %b = xor <2 x i8> %y, <i8 128, i8 128> + %cmp = icmp slt <2 x i8> %a, %b + ret <2 x i1> %cmp +} + +; Make sure that unsigned -> signed works too. + +define i1 @ult_to_slt(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_to_slt( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 %x, %y +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = xor i8 %x, 128 + %b = xor i8 %y, 128 + %cmp = icmp ult i8 %a, %b + ret i1 %cmp +} + +define <2 x i1> @ult_to_slt_splat(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @ult_to_slt_splat( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> %x, %y +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %a = xor <2 x i8> %x, <i8 128, i8 128> + %b = xor <2 x i8> %y, <i8 128, i8 128> + %cmp = icmp ult <2 x i8> %a, %b + ret <2 x i1> %cmp +} + +; icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b + +define i1 @slt_to_ugt(i8 %x, i8 %y) { +; CHECK-LABEL: @slt_to_ugt( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 %x, %y +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = xor i8 %x, 127 + %b = xor i8 %y, 127 + %cmp = icmp slt i8 %a, %b + ret i1 %cmp +} + +define <2 x i1> @slt_to_ugt_splat(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @slt_to_ugt_splat( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> %x, %y +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %a = xor <2 x i8> %x, <i8 127, i8 127> + %b = xor <2 x i8> %y, <i8 127, i8 127> + %cmp = icmp slt <2 x i8> %a, %b + ret <2 x i1> %cmp +} + +; Make sure that unsigned -> signed works too. + +define i1 @ult_to_sgt(i8 %x, i8 %y) { +; CHECK-LABEL: @ult_to_sgt( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 %x, %y +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = xor i8 %x, 127 + %b = xor i8 %y, 127 + %cmp = icmp ult i8 %a, %b + ret i1 %cmp +} + +define <2 x i1> @ult_to_sgt_splat(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @ult_to_sgt_splat( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> %x, %y +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %a = xor <2 x i8> %x, <i8 127, i8 127> + %b = xor <2 x i8> %y, <i8 127, i8 127> + %cmp = icmp ult <2 x i8> %a, %b + ret <2 x i1> %cmp +} + +; icmp u/s (a ^ signmask), C --> icmp s/u a, C' + +define i1 @sge_to_ugt(i8 %x) { +; CHECK-LABEL: @sge_to_ugt( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 %x, -114 +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = xor i8 %x, 128 + %cmp = icmp sge i8 %a, 15 + ret i1 %cmp +} + +define <2 x i1> @sge_to_ugt_splat(<2 x i8> %x) { +; CHECK-LABEL: @sge_to_ugt_splat( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> %x, <i8 -114, i8 -114> +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %a = xor <2 x i8> %x, <i8 128, i8 128> + %cmp = icmp sge <2 x i8> %a, <i8 15, i8 15> + ret <2 x i1> %cmp +} + +; Make sure that unsigned -> signed works too. + +define i1 @uge_to_sgt(i8 %x) { +; CHECK-LABEL: @uge_to_sgt( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 %x, -114 +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = xor i8 %x, 128 + %cmp = icmp uge i8 %a, 15 + ret i1 %cmp +} + +define <2 x i1> @uge_to_sgt_splat(<2 x i8> %x) { +; CHECK-LABEL: @uge_to_sgt_splat( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -114, i8 -114> +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %a = xor <2 x i8> %x, <i8 128, i8 128> + %cmp = icmp uge <2 x i8> %a, <i8 15, i8 15> + ret <2 x i1> %cmp +} + +; icmp u/s (a ^ maxsignval), C --> icmp s/u' a, C' + +define i1 @sge_to_ult(i8 %x) { +; CHECK-LABEL: @sge_to_ult( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 %x, 113 +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = xor i8 %x, 127 + %cmp = icmp sge i8 %a, 15 + ret i1 %cmp +} + +define <2 x i1> @sge_to_ult_splat(<2 x i8> %x) { +; CHECK-LABEL: @sge_to_ult_splat( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> %x, <i8 113, i8 113> +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %a = xor <2 x i8> %x, <i8 127, i8 127> + %cmp = icmp sge <2 x i8> %a, <i8 15, i8 15> + ret <2 x i1> %cmp +} + +; Make sure that unsigned -> signed works too. + +define i1 @uge_to_slt(i8 %x) { +; CHECK-LABEL: @uge_to_slt( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 %x, 113 +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = xor i8 %x, 127 + %cmp = icmp uge i8 %a, 15 + ret i1 %cmp +} + +define <2 x i1> @uge_to_slt_splat(<2 x i8> %x) { +; CHECK-LABEL: @uge_to_slt_splat( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> %x, <i8 113, i8 113> +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %a = xor <2 x i8> %x, <i8 127, i8 127> + %cmp = icmp uge <2 x i8> %a, <i8 15, i8 15> + ret <2 x i1> %cmp +} + +; PR33138, part 2: https://bugs.llvm.org/show_bug.cgi?id=33138 +; TODO: We could look through vector bitcasts for icmp folds, +; or we could canonicalize bitcast ahead of logic ops with constants. + +define <8 x i1> @sgt_to_ugt_bitcasted_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @sgt_to_ugt_bitcasted_splat( +; CHECK-NEXT: [[A:%.*]] = xor <2 x i32> %x, <i32 -2139062144, i32 -2139062144> +; CHECK-NEXT: [[B:%.*]] = xor <2 x i32> %y, <i32 -2139062144, i32 -2139062144> +; CHECK-NEXT: [[C:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +; CHECK-NEXT: [[D:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> +; CHECK-NEXT: [[E:%.*]] = icmp sgt <8 x i8> [[C]], [[D]] +; CHECK-NEXT: ret <8 x i1> [[E]] +; + %a = xor <2 x i32> %x, <i32 2155905152, i32 2155905152> ; 0x80808080 + %b = xor <2 x i32> %y, <i32 2155905152, i32 2155905152> + %c = bitcast <2 x i32> %a to <8 x i8> + %d = bitcast <2 x i32> %b to <8 x i8> + %e = icmp sgt <8 x i8> %c, %d + ret <8 x i1> %e +} + +; TODO: This is false (little-endian). How should that be recognized? +; Ie, should InstSimplify know this directly, should InstCombine canonicalize +; this so InstSimplify can know this, or is that not something that we want +; either pass to recognize? + +define <2 x i1> @negative_simplify_splat(<4 x i8> %x) { +; CHECK-LABEL: @negative_simplify_splat( +; CHECK-NEXT: [[A:%.*]] = or <4 x i8> %x, <i8 0, i8 -128, i8 0, i8 -128> +; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i8> [[A]] to <2 x i16> +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[B]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %a = or <4 x i8> %x, <i8 0, i8 128, i8 0, i8 128> + %b = bitcast <4 x i8> %a to <2 x i16> + %c = icmp sgt <2 x i16> %b, zeroinitializer + ret <2 x i1> %c +} + diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll index 6f657b190454..ed570da73c9e 100644 --- a/test/Transforms/InstCombine/icmp.ll +++ b/test/Transforms/InstCombine/icmp.ll @@ -2895,3 +2895,67 @@ define i1 @cmp_ult_rhs_dec(float %x, i32 %y) { %cmp = icmp ult i32 %conv, %dec ret i1 %cmp } + +define i1 @eq_add_constants(i32 %x, i32 %y) { +; CHECK-LABEL: @eq_add_constants( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 %x, %y +; CHECK-NEXT: ret i1 [[C]] +; + %A = add i32 %x, 5 + %B = add i32 %y, 5 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define i1 @eq_mul_constants(i32 %x, i32 %y) { +; CHECK-LABEL: @eq_mul_constants( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 %x, %y +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 5 + %B = mul i32 %y, 5 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @eq_mul_constants_splat( +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> %x, %y +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, <i32 5, i32 5> + %B = mul <2 x i32> %y, <i32 5, i32 5> + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +; If the multiply constant has any trailing zero bits, we get something completely different. +; We mask off the high bits of each input and then convert: +; (X&Z) == (Y&Z) -> (X^Y) & Z == 0 + +define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %x, %y +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 12 + %B = mul i32 %y, 12 + %C = icmp ne i32 %A, %B + ret i1 %C +} + +define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz_splat( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> %x, %y +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 1073741823, i32 1073741823> +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, <i32 12, i32 12> + %B = mul <2 x i32> %y, <i32 12, i32 12> + %C = icmp eq <2 x i32> %A, %B + ret <2 x i1> %C +} + diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll index 988ec2b71c50..68daac65ee6b 100644 --- a/test/Transforms/InstSimplify/call.ll +++ b/test/Transforms/InstSimplify/call.ll @@ -1,64 +1,212 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instsimplify -S | FileCheck %s ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) +declare {i8, i1} @llvm.sadd.with.overflow.i8(i8 %a, i8 %b) declare {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b) declare {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a, i8 %b) declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b) +declare {i8, i1} @llvm.smul.with.overflow.i8(i8 %a, i8 %b) define i1 @test_uadd1() { ; CHECK-LABEL: @test_uadd1( +; CHECK-NEXT: ret i1 true +; %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 3) %overflow = extractvalue {i8, i1} %x, 1 ret i1 %overflow -; CHECK-NEXT: ret i1 true } define i8 @test_uadd2() { ; CHECK-LABEL: @test_uadd2( +; CHECK-NEXT: ret i8 42 +; %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 44) %result = extractvalue {i8, i1} %x, 0 ret i8 %result -; CHECK-NEXT: ret i8 42 +} + +define {i8, i1} @test_uadd3(i8 %v) { +; CHECK-LABEL: @test_uadd3( +; CHECK-NEXT: ret { i8, i1 } undef +; + %result = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %v, i8 undef) + ret {i8, i1} %result +} + +define {i8, i1} @test_uadd4(i8 %v) { +; CHECK-LABEL: @test_uadd4( +; CHECK-NEXT: ret { i8, i1 } undef +; + %result = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 undef, i8 %v) + ret {i8, i1} %result +} + +define i1 @test_sadd1() { +; CHECK-LABEL: @test_sadd1( +; CHECK-NEXT: ret i1 true +; + %x = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 126, i8 3) + %overflow = extractvalue {i8, i1} %x, 1 + ret i1 %overflow +} + +define i8 @test_sadd2() { +; CHECK-LABEL: @test_sadd2( +; CHECK-NEXT: ret i8 -86 +; + %x = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 126, i8 44) + %result = extractvalue {i8, i1} %x, 0 + ret i8 %result +} + +define {i8, i1} @test_sadd3(i8 %v) { +; CHECK-LABEL: @test_sadd3( +; CHECK-NEXT: ret { i8, i1 } undef +; + %result = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %v, i8 undef) + ret {i8, i1} %result +} + +define {i8, i1} @test_sadd4(i8 %v) { +; CHECK-LABEL: @test_sadd4( +; CHECK-NEXT: ret { i8, i1 } undef +; + %result = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 undef, i8 %v) + ret {i8, i1} %result } define {i8, i1} @test_usub1(i8 %V) { ; CHECK-LABEL: @test_usub1( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; %x = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %V, i8 %V) ret {i8, i1} %x -; CHECK-NEXT: ret { i8, i1 } zeroinitializer +} + +define {i8, i1} @test_usub2(i8 %V) { +; CHECK-LABEL: @test_usub2( +; CHECK-NEXT: ret { i8, i1 } undef +; + %x = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %V, i8 undef) + ret {i8, i1} %x +} + +define {i8, i1} @test_usub3(i8 %V) { +; CHECK-LABEL: @test_usub3( +; CHECK-NEXT: ret { i8, i1 } undef +; + %x = call {i8, i1} @llvm.usub.with.overflow.i8(i8 undef, i8 %V) + ret {i8, i1} %x } define {i8, i1} @test_ssub1(i8 %V) { ; CHECK-LABEL: @test_ssub1( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; %x = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %V, i8 %V) ret {i8, i1} %x -; CHECK-NEXT: ret { i8, i1 } zeroinitializer +} + +define {i8, i1} @test_ssub2(i8 %V) { +; CHECK-LABEL: @test_ssub2( +; CHECK-NEXT: ret { i8, i1 } undef +; + %x = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %V, i8 undef) + ret {i8, i1} %x +} + +define {i8, i1} @test_ssub3(i8 %V) { +; CHECK-LABEL: @test_ssub3( +; CHECK-NEXT: ret { i8, i1 } undef +; + %x = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 undef, i8 %V) + ret {i8, i1} %x } define {i8, i1} @test_umul1(i8 %V) { ; CHECK-LABEL: @test_umul1( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %V, i8 0) ret {i8, i1} %x -; CHECK-NEXT: ret { i8, i1 } zeroinitializer +} + +define {i8, i1} @test_umul2(i8 %V) { +; CHECK-LABEL: @test_umul2( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; + %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %V, i8 undef) + ret {i8, i1} %x +} + +define {i8, i1} @test_umul3(i8 %V) { +; CHECK-LABEL: @test_umul3( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; + %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 0, i8 %V) + ret {i8, i1} %x +} + +define {i8, i1} @test_umul4(i8 %V) { +; CHECK-LABEL: @test_umul4( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; + %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 %V) + ret {i8, i1} %x +} + +define {i8, i1} @test_smul1(i8 %V) { +; CHECK-LABEL: @test_smul1( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; + %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %V, i8 0) + ret {i8, i1} %x +} + +define {i8, i1} @test_smul2(i8 %V) { +; CHECK-LABEL: @test_smul2( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; + %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %V, i8 undef) + ret {i8, i1} %x +} + +define {i8, i1} @test_smul3(i8 %V) { +; CHECK-LABEL: @test_smul3( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; + %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 0, i8 %V) + ret {i8, i1} %x +} + +define {i8, i1} @test_smul4(i8 %V) { +; CHECK-LABEL: @test_smul4( +; CHECK-NEXT: ret { i8, i1 } zeroinitializer +; + %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 undef, i8 %V) + ret {i8, i1} %x } declare i256 @llvm.cttz.i256(i256 %src, i1 %is_zero_undef) define i256 @test_cttz() { ; CHECK-LABEL: @test_cttz( +; CHECK-NEXT: ret i256 1 +; %x = call i256 @llvm.cttz.i256(i256 10, i1 false) ret i256 %x -; CHECK-NEXT: ret i256 1 } declare i256 @llvm.ctpop.i256(i256 %src) define i256 @test_ctpop() { ; CHECK-LABEL: @test_ctpop( +; CHECK-NEXT: ret i256 2 +; %x = call i256 @llvm.ctpop.i256(i256 10) ret i256 %x -; CHECK-NEXT: ret i256 2 } ; Test a non-intrinsic that we know about as a library call. @@ -66,14 +214,15 @@ declare float @fabs(float %x) define float @test_fabs_libcall() { ; CHECK-LABEL: @test_fabs_libcall( +; CHECK-NEXT: [[X:%.*]] = call float @fabs(float -4.200000e+01) +; CHECK-NEXT: ret float 4.200000e+01 +; %x = call float @fabs(float -42.0) ; This is still a real function call, so instsimplify won't nuke it -- other ; passes have to do that. -; CHECK-NEXT: call float @fabs ret float %x -; CHECK-NEXT: ret float 4.2{{0+}}e+01 } @@ -87,34 +236,35 @@ declare float @llvm.nearbyint.f32(float) nounwind readnone ; Test idempotent intrinsics define float @test_idempotence(float %a) { ; CHECK-LABEL: @test_idempotence( +; CHECK-NEXT: [[A0:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]]) +; CHECK-NEXT: [[B0:%.*]] = call float @llvm.floor.f32(float [[A]]) +; CHECK-NEXT: [[C0:%.*]] = call float @llvm.ceil.f32(float [[A]]) +; CHECK-NEXT: [[D0:%.*]] = call float @llvm.trunc.f32(float [[A]]) +; CHECK-NEXT: [[E0:%.*]] = call float @llvm.rint.f32(float [[A]]) +; CHECK-NEXT: [[F0:%.*]] = call float @llvm.nearbyint.f32(float [[A]]) +; CHECK-NEXT: [[R0:%.*]] = fadd float [[A0]], [[B0]] +; CHECK-NEXT: [[R1:%.*]] = fadd float [[R0]], [[C0]] +; CHECK-NEXT: [[R2:%.*]] = fadd float [[R1]], [[D0]] +; CHECK-NEXT: [[R3:%.*]] = fadd float [[R2]], [[E0]] +; CHECK-NEXT: [[R4:%.*]] = fadd float [[R3]], [[F0]] +; CHECK-NEXT: ret float [[R4]] +; -; CHECK: fabs -; CHECK-NOT: fabs %a0 = call float @llvm.fabs.f32(float %a) %a1 = call float @llvm.fabs.f32(float %a0) -; CHECK: floor -; CHECK-NOT: floor %b0 = call float @llvm.floor.f32(float %a) %b1 = call float @llvm.floor.f32(float %b0) -; CHECK: ceil -; CHECK-NOT: ceil %c0 = call float @llvm.ceil.f32(float %a) %c1 = call float @llvm.ceil.f32(float %c0) -; CHECK: trunc -; CHECK-NOT: trunc %d0 = call float @llvm.trunc.f32(float %a) %d1 = call float @llvm.trunc.f32(float %d0) -; CHECK: rint -; CHECK-NOT: rint %e0 = call float @llvm.rint.f32(float %a) %e1 = call float @llvm.rint.f32(float %e0) -; CHECK: nearbyint -; CHECK-NOT: nearbyint %f0 = call float @llvm.nearbyint.f32(float %a) %f1 = call float @llvm.nearbyint.f32(float %f0) @@ -128,6 +278,17 @@ define float @test_idempotence(float %a) { } define i8* @operator_new() { +; CHECK-LABEL: @operator_new( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @_Znwm(i64 8) +; CHECK-NEXT: br i1 false, label [[CAST_END:%.*]], label [[CAST_NOTNULL:%.*]] +; CHECK: cast.notnull: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 4 +; CHECK-NEXT: br label [[CAST_END]] +; CHECK: cast.end: +; CHECK-NEXT: [[CAST_RESULT:%.*]] = phi i8* [ [[ADD_PTR]], [[CAST_NOTNULL]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i8* [[CAST_RESULT]] +; entry: %call = tail call noalias i8* @_Znwm(i64 8) %cmp = icmp eq i8* %call, null @@ -141,8 +302,6 @@ cast.end: ; preds = %cast.notnull, %entr %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ] ret i8* %cast.result -; CHECK-LABEL: @operator_new -; CHECK: br i1 false, label %cast.end, label %cast.notnull } declare nonnull noalias i8* @_Znwm(i64) @@ -151,6 +310,18 @@ declare nonnull noalias i8* @_Znwm(i64) @_ZSt7nothrow = external global %"struct.std::nothrow_t" define i8* @operator_new_nothrow_t() { +; CHECK-LABEL: @operator_new_nothrow_t( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @_ZnamRKSt9nothrow_t(i64 8, %"struct.std::nothrow_t"* @_ZSt7nothrow) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: br i1 [[CMP]], label [[CAST_END:%.*]], label [[CAST_NOTNULL:%.*]] +; CHECK: cast.notnull: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 4 +; CHECK-NEXT: br label [[CAST_END]] +; CHECK: cast.end: +; CHECK-NEXT: [[CAST_RESULT:%.*]] = phi i8* [ [[ADD_PTR]], [[CAST_NOTNULL]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i8* [[CAST_RESULT]] +; entry: %call = tail call noalias i8* @_ZnamRKSt9nothrow_t(i64 8, %"struct.std::nothrow_t"* @_ZSt7nothrow) %cmp = icmp eq i8* %call, null @@ -164,13 +335,23 @@ cast.end: ; preds = %cast.notnull, %entr %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ] ret i8* %cast.result -; CHECK-LABEL: @operator_new_nothrow_t -; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull } declare i8* @_ZnamRKSt9nothrow_t(i64, %"struct.std::nothrow_t"*) nounwind define i8* @malloc_can_return_null() { +; CHECK-LABEL: @malloc_can_return_null( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @malloc(i64 8) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: br i1 [[CMP]], label [[CAST_END:%.*]], label [[CAST_NOTNULL:%.*]] +; CHECK: cast.notnull: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 4 +; CHECK-NEXT: br label [[CAST_END]] +; CHECK: cast.end: +; CHECK-NEXT: [[CAST_RESULT:%.*]] = phi i8* [ [[ADD_PTR]], [[CAST_NOTNULL]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i8* [[CAST_RESULT]] +; entry: %call = tail call noalias i8* @malloc(i64 8) %cmp = icmp eq i8* %call, null @@ -184,38 +365,44 @@ cast.end: ; preds = %cast.notnull, %entr %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ] ret i8* %cast.result -; CHECK-LABEL: @malloc_can_return_null -; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull } define i32 @call_null() { +; CHECK-LABEL: @call_null( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 null() +; CHECK-NEXT: ret i32 undef +; entry: %call = call i32 null() ret i32 %call } -; CHECK-LABEL: define i32 @call_null( -; CHECK: ret i32 undef define i32 @call_undef() { +; CHECK-LABEL: @call_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 undef() +; CHECK-NEXT: ret i32 undef +; entry: %call = call i32 undef() ret i32 %call } -; CHECK-LABEL: define i32 @call_undef( -; CHECK: ret i32 undef @GV = private constant [8 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49] define <8 x i32> @partial_masked_load() { ; CHECK-LABEL: @partial_masked_load( -; CHECK: ret <8 x i32> <i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47> +; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47> +; %masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -2) to <8 x i32>*), i32 4, <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) ret <8 x i32> %masked.load } define <8 x i32> @masked_load_undef_mask(<8 x i32>* %V) { ; CHECK-LABEL: @masked_load_undef_mask( -; CHECK: ret <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0> +; %masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %V, i32 4, <8 x i1> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>) ret <8 x i32> %masked.load } diff --git a/test/Transforms/InstSimplify/or.ll b/test/Transforms/InstSimplify/or.ll index 2c5b6181bc6c..14b08af00646 100644 --- a/test/Transforms/InstSimplify/or.ll +++ b/test/Transforms/InstSimplify/or.ll @@ -159,7 +159,7 @@ define i399 @test4_apint(i399 %V, i399 %M) { %A = add i399 %V, %N %B = and i399 %A, %C1 %D = and i399 %V, 274877906943 - %R = or i399 %B, %D + %R = or i399 %D, %B ret i399 %R } @@ -179,3 +179,42 @@ define i117 @test6_apint(i117 %X) { ret i117 %Y } +; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0. +; Vector version of test1_apint with the add commuted +define <2 x i39> @test7_apint(<2 x i39> %V, <2 x i39> %M) { +; CHECK-LABEL: @test7_apint( +; CHECK-NEXT: [[N:%.*]] = and <2 x i39> [[M:%.*]], <i39 -274877906944, i39 -274877906944> +; CHECK-NEXT: [[A:%.*]] = add <2 x i39> [[N]], [[V:%.*]] +; CHECK-NEXT: ret <2 x i39> [[A]] +; + ;; If we have: ((V + N) & C1) | (V & C2) + ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + ;; replace with V+N. + %C1 = xor <2 x i39> <i39 274877906943, i39 274877906943>, <i39 -1, i39 -1> ;; C2 = 274877906943 + %N = and <2 x i39> %M, <i39 274877906944, i39 274877906944> + %A = add <2 x i39> %N, %V + %B = and <2 x i39> %A, %C1 + %D = and <2 x i39> %V, <i39 274877906943, i39 274877906943> + %R = or <2 x i39> %B, %D + ret <2 x i39> %R +} + +; Test the case where Integer BitWidth > 64 && BitWidth <= 1024. +; Vector version of test4_apint with the add and the or commuted +define <2 x i399> @test8_apint(<2 x i399> %V, <2 x i399> %M) { +; CHECK-LABEL: @test8_apint( +; CHECK-NEXT: [[N:%.*]] = and <2 x i399> [[M:%.*]], <i399 18446742974197923840, i399 18446742974197923840> +; CHECK-NEXT: [[A:%.*]] = add <2 x i399> [[N]], [[V:%.*]] +; CHECK-NEXT: ret <2 x i399> [[A]] +; + ;; If we have: ((V + N) & C1) | (V & C2) + ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + ;; replace with V+N. + %C1 = xor <2 x i399> <i399 274877906943, i399 274877906943>, <i399 -1, i399 -1> ;; C2 = 274877906943 + %N = and <2 x i399> %M, <i399 18446742974197923840, i399 18446742974197923840> + %A = add <2 x i399> %N, %V + %B = and <2 x i399> %A, %C1 + %D = and <2 x i399> %V, <i399 274877906943, i399 274877906943> + %R = or <2 x i399> %D, %B + ret <2 x i399> %R +} diff --git a/test/Transforms/JumpThreading/assume.ll b/test/Transforms/JumpThreading/assume.ll index 3a039676e172..f58ee299cba0 100644 --- a/test/Transforms/JumpThreading/assume.ll +++ b/test/Transforms/JumpThreading/assume.ll @@ -59,12 +59,12 @@ return: ; preds = %entry, %if.then @g = external global i32 ; Check that we do prove a fact using an assume within the block. -; FIXME: We can fold the assume based on the semantics of assume. -; CHECK-LABEL: @can_fold_assume -; CHECK: %notnull = icmp ne i32* %array, null -; CHECK-NEXT: call void @llvm.assume(i1 %notnull) -; CHECK-NEXT: ret void +; We can fold the assume based on the semantics of assume. define void @can_fold_assume(i32* %array) { +; CHECK-LABEL: @can_fold_assume +; CHECK-NOT: call void @llvm.assume +; CHECK-NOT: br +; CHECK: ret void %notnull = icmp ne i32* %array, null call void @llvm.assume(i1 %notnull) br i1 %notnull, label %normal, label %error @@ -80,19 +80,128 @@ error: declare void @f(i1) declare void @exit() ; We can fold the assume but not the uses before the assume. -define void @dont_fold_incorrectly(i32* %array) { -; CHECK-LABEL:@dont_fold_incorrectly +define void @cannot_fold_use_before_assume(i32* %array) { +; CHECK-LABEL:@cannot_fold_use_before_assume ; CHECK: @f(i1 %notnull) ; CHECK-NEXT: exit() -; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NOT: assume +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @f(i1 %notnull) + call void @exit() + call void @llvm.assume(i1 %notnull) + br i1 %notnull, label %normal, label %error + +normal: + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void +} + +declare void @dummy(i1) nounwind argmemonly +define void @can_fold_some_use_before_assume(i32* %array) { + +; CHECK-LABEL:@can_fold_some_use_before_assume +; CHECK: @f(i1 %notnull) +; CHECK-NEXT: @dummy(i1 true) +; CHECK-NOT: assume ; CHECK-NEXT: ret void %notnull = icmp ne i32* %array, null call void @f(i1 %notnull) + call void @dummy(i1 %notnull) + call void @llvm.assume(i1 %notnull) + br i1 %notnull, label %normal, label %error + +normal: + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void + +} + +; FIXME: can fold assume and all uses before/after assume. +; because the trapping exit call is after the assume. +define void @can_fold_assume_and_all_uses(i32* %array) { +; CHECK-LABEL:@can_fold_assume_and_all_uses +; CHECK: @dummy(i1 %notnull) +; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NEXT: exit() +; CHECK-NEXT: %notnull2 = or i1 true, false +; CHECK-NEXT: @f(i1 %notnull2) +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @dummy(i1 %notnull) + call void @llvm.assume(i1 %notnull) call void @exit() + br i1 %notnull, label %normal, label %error + +normal: + %notnull2 = or i1 %notnull, false + call void @f(i1 %notnull2) + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void +} + +declare void @fz(i8) +; FIXME: We can fold assume to true, and the use after assume, but we do not do so +; currently, because of the function call after the assume. +define void @can_fold_assume2(i32* %array) { + +; CHECK-LABEL:@can_fold_assume2 +; CHECK: @f(i1 %notnull) +; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NEXT: znotnull = zext i1 %notnull to i8 +; CHECK-NEXT: @f(i1 %notnull) +; CHECK-NEXT: @f(i1 true) +; CHECK-NEXT: @fz(i8 %znotnull) +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @f(i1 %notnull) + call void @llvm.assume(i1 %notnull) + %znotnull = zext i1 %notnull to i8 + call void @f(i1 %notnull) + br i1 %notnull, label %normal, label %error + +normal: + call void @f(i1 %notnull) + call void @fz(i8 %znotnull) + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void +} + +declare void @llvm.experimental.guard(i1, ...) +; FIXME: We can fold assume to true, but we do not do so +; because of the guard following the assume. +define void @can_fold_assume3(i32* %array){ + +; CHECK-LABEL:@can_fold_assume3 +; CHECK: @f(i1 %notnull) +; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NEXT: guard(i1 %notnull) +; CHECK-NEXT: znotnull = zext i1 true to i8 +; CHECK-NEXT: @f(i1 true) +; CHECK-NEXT: @fz(i8 %znotnull) +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @f(i1 %notnull) call void @llvm.assume(i1 %notnull) + call void(i1, ...) @llvm.experimental.guard(i1 %notnull) [ "deopt"() ] + %znotnull = zext i1 %notnull to i8 br i1 %notnull, label %normal, label %error normal: + call void @f(i1 %notnull) + call void @fz(i8 %znotnull) ret void error: @@ -100,6 +209,26 @@ error: ret void } + +; can fold all uses and remove the cond +define void @can_fold_assume4(i32* %array) { +; CHECK-LABEL: can_fold_assume4 +; CHECK-NOT: notnull +; CHECK: dummy(i1 true) +; CHECK-NEXT: ret void + %notnull = icmp ne i32* %array, null + call void @exit() + call void @dummy(i1 %notnull) + call void @llvm.assume(i1 %notnull) + br i1 %notnull, label %normal, label %error + +normal: + ret void + +error: + store atomic i32 0, i32* @g unordered, align 4 + ret void +} ; Function Attrs: nounwind declare void @llvm.assume(i1) #1 diff --git a/test/Transforms/JumpThreading/fold-not-thread.ll b/test/Transforms/JumpThreading/fold-not-thread.ll index f05169b31bc8..85cdcc0d9b33 100644 --- a/test/Transforms/JumpThreading/fold-not-thread.ll +++ b/test/Transforms/JumpThreading/fold-not-thread.ll @@ -133,10 +133,10 @@ L3: ret void } -; FIXME: Make sure we can do the RAUW for %add... +; Make sure we can do the RAUW for %add... ; ; CHECK-LABEL: @rauw_if_possible( -; CHECK: call void @f4(i32 %add) +; CHECK: call void @f4(i32 96) define void @rauw_if_possible(i32 %value) nounwind { entry: %cmp = icmp eq i32 %value, 32 diff --git a/test/Transforms/JumpThreading/guards.ll b/test/Transforms/JumpThreading/guards.ll index c5f72b113efc..53175a7b7253 100644 --- a/test/Transforms/JumpThreading/guards.ll +++ b/test/Transforms/JumpThreading/guards.ll @@ -182,86 +182,89 @@ Exit: ret void } -declare void @never_called() +declare void @never_called(i1) -; Assume the guard is always taken and we deoptimize, so we never reach the -; branch below that guard. We should *never* change the behaviour of a guard from -; `must deoptimize` to `may deoptimize`, since this affects the program -; semantics. +; LVI uses guard to identify value of %c2 in branch as true, we cannot replace that +; guard with guard(true & c1). define void @dont_fold_guard(i8* %addr, i32 %i, i32 %length) { ; CHECK-LABEL: dont_fold_guard -; CHECK: experimental.guard(i1 %wide.chk) - -entry: - br label %BBPred +; CHECK: %wide.chk = and i1 %c1, %c2 +; CHECK-NEXT: experimental.guard(i1 %wide.chk) +; CHECK-NEXT: call void @never_called(i1 true) +; CHECK-NEXT: ret void + %c1 = icmp ult i32 %i, %length + %c2 = icmp eq i32 %i, 0 + %wide.chk = and i1 %c1, %c2 + call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ] + br i1 %c2, label %BB1, label %BB2 -BBPred: - %cond = icmp eq i8* %addr, null - br i1 %cond, label %zero, label %not_zero +BB1: + call void @never_called(i1 %c2) + ret void -zero: - unreachable +BB2: + ret void +} -not_zero: +declare void @dummy(i1) nounwind argmemonly +; same as dont_fold_guard1 but there's a use immediately after guard and before +; branch. We can fold that use. +define void @dont_fold_guard2(i8* %addr, i32 %i, i32 %length) { +; CHECK-LABEL: dont_fold_guard2 +; CHECK: %wide.chk = and i1 %c1, %c2 +; CHECK-NEXT: experimental.guard(i1 %wide.chk) +; CHECK-NEXT: dummy(i1 true) +; CHECK-NEXT: call void @never_called(i1 true) +; CHECK-NEXT: ret void %c1 = icmp ult i32 %i, %length %c2 = icmp eq i32 %i, 0 %wide.chk = and i1 %c1, %c2 call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ] - br i1 %c2, label %unreachedBB2, label %unreachedBB1 + call void @dummy(i1 %c2) + br i1 %c2, label %BB1, label %BB2 -unreachedBB2: - call void @never_called() +BB1: + call void @never_called(i1 %c2) ret void -unreachedBB1: +BB2: ret void } - ; same as dont_fold_guard1 but condition %cmp is not an instruction. ; We cannot fold the guard under any circumstance. ; FIXME: We can merge unreachableBB2 into not_zero. -define void @dont_fold_guard2(i8* %addr, i1 %cmp, i32 %i, i32 %length) { -; CHECK-LABEL: dont_fold_guard2 +define void @dont_fold_guard3(i8* %addr, i1 %cmp, i32 %i, i32 %length) { +; CHECK-LABEL: dont_fold_guard3 ; CHECK: guard(i1 %cmp) - -entry: - br label %BBPred - -BBPred: - %cond = icmp eq i8* %addr, null - br i1 %cond, label %zero, label %not_zero - -zero: - unreachable - -not_zero: call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ] - br i1 %cmp, label %unreachedBB2, label %unreachedBB1 + br i1 %cmp, label %BB1, label %BB2 -unreachedBB2: - call void @never_called() +BB1: + call void @never_called(i1 %cmp) ret void -unreachedBB1: +BB2: ret void } +declare void @f(i1) ; Same as dont_fold_guard1 but use switch instead of branch. ; triggers source code `ProcessThreadableEdges`. -declare void @f(i1) -define void @dont_fold_guard3(i1 %cmp1, i32 %i) nounwind { -; CHECK-LABEL: dont_fold_guard3 +define void @dont_fold_guard4(i1 %cmp1, i32 %i) nounwind { +; CHECK-LABEL: dont_fold_guard4 ; CHECK-LABEL: L2: ; CHECK-NEXT: %cmp = icmp eq i32 %i, 0 ; CHECK-NEXT: guard(i1 %cmp) -; CHECK-NEXT: @f(i1 %cmp) +; CHECK-NEXT: dummy(i1 true) +; CHECK-NEXT: @f(i1 true) ; CHECK-NEXT: ret void entry: br i1 %cmp1, label %L0, label %L3 L0: %cmp = icmp eq i32 %i, 0 call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ] + call void @dummy(i1 %cmp) switch i1 %cmp, label %L3 [ i1 false, label %L1 i1 true, label %L2 diff --git a/test/Transforms/LoopIdiom/pr33114.ll b/test/Transforms/LoopIdiom/pr33114.ll new file mode 100644 index 000000000000..fa44d8e31e7c --- /dev/null +++ b/test/Transforms/LoopIdiom/pr33114.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Check that we're not crashing while looking at the recurrence variable. +; RUN: opt -S -loop-idiom %s | FileCheck %s + +define void @tinkywinky() { +; CHECK-LABEL: @tinkywinky( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[PH:%.*]] +; CHECK: ph: +; CHECK-NEXT: [[MYPHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[PATATINO:%.*]] = ashr i32 [[MYPHI]], undef +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[PATATINO]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[EXIT_LOOPEXIT:%.*]], label [[IF_END]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br i1 true, label %exit, label %ph + +ph: + %myphi = phi i32 [ 1, %entry ] + br label %if.end + +if.end: + %patatino = ashr i32 %myphi, undef + %tobool = icmp eq i32 %patatino, 0 + br i1 %tobool, label %exit, label %if.end + +exit: + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll b/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll index 3adb8bcf514d..00c3222b0051 100644 --- a/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll +++ b/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll @@ -25,7 +25,7 @@ L2: ; preds = %idxend.8 if6: ; preds = %idxend.8 %r2 = add i64 %0, -1 %r3 = load i64, i64* %1, align 8 -; CHECK-NOT: %r2 +; CHECK: %r2 = add i64 %0, -1 ; CHECK: %r3 = load i64 br label %ib @@ -36,13 +36,11 @@ ib: ; preds = %if6 %r4 = mul i64 %r3, %r0 %r5 = add i64 %r2, %r4 %r6 = icmp ult i64 %r5, undef -; CHECK: [[MUL1:%[0-9]+]] = mul i64 %lsr.iv, %r3 -; CHECK: [[ADD1:%[0-9]+]] = add i64 [[MUL1]], -1 -; CHECK: add i64 %{{.}}, [[ADD1]] -; CHECK: %r6 +; CHECK: %r4 = mul i64 %r3, %lsr.iv +; CHECK: %r5 = add i64 %r2, %r4 +; CHECK: %r6 = icmp ult i64 %r5, undef +; CHECK: %r7 = getelementptr i64, i64* undef, i64 %r5 %r7 = getelementptr i64, i64* undef, i64 %r5 store i64 1, i64* %r7, align 8 -; CHECK: [[MUL2:%[0-9]+]] = mul i64 %lsr.iv, %r3 -; CHECK: [[ADD2:%[0-9]+]] = add i64 [[MUL2]], -1 br label %L } diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll index aa688d999e60..a7731bfcec56 100644 --- a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll +++ b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll @@ -1,5 +1,14 @@ +; REQUIRES: x86 ; RUN: opt -loop-reduce -S < %s | FileCheck %s +; Strength reduction analysis here relies on IV Users analysis, that +; only finds users among instructions with types that are treated as +; legal by the data layout. When running this test on pure non-x86 +; configs (for example, ARM 64), it gets confused with the target +; triple and uses a default data layout instead. This default layout +; does not have any legal types (even i32), so the transformation +; does not happen. + target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx" @@ -7,16 +16,23 @@ target triple = "x86_64-apple-macosx" ; ; SCEV expander cannot expand quadratic recurrences outside of the ; loop. This recurrence depends on %sub.us, so can't be expanded. +; We cannot fold SCEVUnknown (sub.us) with recurrences since it is +; declared after the loop. ; ; CHECK-LABEL: @test2 ; CHECK-LABEL: test2.loop: -; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ] -; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216 +; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ] +; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ] +; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1 +; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216 ; ; CHECK-LABEL: for.end: -; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us -; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us -; CHECK: %f = ashr i32 %sext.us, 24 +; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0 +; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0 +; CHECK: %1 = sub i32 0, %sub.us +; CHECK: %2 = add i32 %1, %lsr.iv.next +; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2 +; CHECK: %f = ashr i32 %sext.us, 24 ; CHECK: ret i32 %f define i32 @test2() { entry: diff --git a/test/Transforms/LoopStrengthReduce/nonintegral.ll b/test/Transforms/LoopStrengthReduce/nonintegral.ll new file mode 100644 index 000000000000..5648e3aa74af --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/nonintegral.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -loop-reduce < %s | FileCheck %s + +; Address Space 10 is non-integral. The optimizer is not allowed to use +; ptrtoint/inttoptr instructions. Make sure that this doesn't happen +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12" +target triple = "x86_64-unknown-linux-gnu" + +define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) { +; CHECK-NOT: inttoptr +; CHECK-NOT: ptrtoint +; How exactly SCEV chooses to materialize isn't all that important, as +; long as it doesn't try to round-trip through integers. As of this writing, +; it emits a byte-wise gep, which is fine. +; CHECK: getelementptr i64, i64 addrspace(10)* {{.*}}, i64 {{.*}} +top: + br label %L86 + +L86: ; preds = %L86, %top + %i.0 = phi i64 [ 0, %top ], [ %tmp, %L86 ] + %tmp = add i64 %i.0, 1 + br i1 undef, label %L86, label %if29 + +if29: ; preds = %L86 + %tmp1 = shl i64 %tmp, 1 + %tmp2 = add i64 %tmp1, -2 + br label %if31 + +if31: ; preds = %if38, %if29 + %"#temp#1.sroa.0.022" = phi i64 [ 0, %if29 ], [ %tmp3, %if38 ] + br label %L119 + +L119: ; preds = %L119, %if31 + %i5.0 = phi i64 [ %"#temp#1.sroa.0.022", %if31 ], [ %tmp3, %L119 ] + %tmp3 = add i64 %i5.0, 1 + br i1 undef, label %L119, label %if38 + +if38: ; preds = %L119 + %tmp4 = add i64 %tmp2, %i5.0 + %tmp5 = getelementptr i64, i64 addrspace(10)* %arg, i64 %tmp4 + %tmp6 = load i64, i64 addrspace(10)* %tmp5 + br i1 undef, label %done, label %if31 + +done: ; preds = %if38 + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll index fbf55fd81d23..cbf177c0d4b9 100644 --- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll +++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll @@ -25,6 +25,8 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 %radix, %struct.Vector2* entry: %buffer = alloca [33 x i16], align 16 %add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33 + %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64 br label %do.body do.body: ; preds = %do.body, %entry @@ -46,8 +48,6 @@ do.body: ; preds = %do.body, %entry do.end: ; preds = %do.body %xap.0 = inttoptr i64 %0 to i1* %cap.0 = ptrtoint i1* %xap.0 to i64 - %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64 - %sub.ptr.rhs.cast = ptrtoint i16* %incdec.ptr to i64 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast %sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1 %conv11 = trunc i64 %sub.ptr.div39 to i32 diff --git a/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll new file mode 100644 index 000000000000..a7f414b8694b --- /dev/null +++ b/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll @@ -0,0 +1,26 @@ +; REQUIRES: asserts +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -S -debug-only=loop-vectorize 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; CHECK-LABEL: all_scalar +; CHECK: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2 +; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2 +; CHECK: LV: Not considering vector loop of width 2 because it will not generate any vector instructions +; +define void @all_scalar(i64* %a, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %tmp0 = getelementptr i64, i64* %a, i64 %i + store i64 0, i64* %tmp0, align 1 + %i.next = add nuw nsw i64 %i, 2 + %cond = icmp eq i64 %i.next, %n + br i1 %cond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/SystemZ/addressing.ll b/test/Transforms/LoopVectorize/SystemZ/addressing.ll new file mode 100644 index 000000000000..1f7a6d29c57c --- /dev/null +++ b/test/Transforms/LoopVectorize/SystemZ/addressing.ll @@ -0,0 +1,72 @@ +; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize -dce \ +; RUN: -instcombine -force-vector-width=2 < %s | FileCheck %s +; +; Test that loop vectorizer does not generate vector addresses that must then +; always be extracted. + +; Check that the addresses for a scalarized memory access is not extracted +; from a vector register. +define i32 @foo(i32* nocapture %A) { +;CHECK-LABEL: @foo( +;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +;CHECK: %0 = shl nsw i64 %index, 2 +;CHECK: %1 = shl i64 %index, 2 +;CHECK: %2 = or i64 %1, 4 +;CHECK: %3 = getelementptr inbounds i32, i32* %A, i64 %0 +;CHECK: %4 = getelementptr inbounds i32, i32* %A, i64 %2 +;CHECK: store i32 4, i32* %3, align 4 +;CHECK: store i32 4, i32* %4, align 4 + +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = shl nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0 + store i32 4, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 10000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 undef +} + + +; Check that a load of address is scalarized. +define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) { +;CHECK-LABEL: @foo1( +;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +;CHECK: %0 = or i64 %index, 1 +;CHECK: %1 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %index +;CHECK: %2 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %0 +;CHECK: %3 = load i32*, i32** %1, align 8 +;CHECK: %4 = load i32*, i32** %2, align 8 +;CHECK: %5 = load i32, i32* %3, align 4 +;CHECK: %6 = load i32, i32* %4, align 4 +;CHECK: %7 = insertelement <2 x i32> undef, i32 %5, i32 0 +;CHECK: %8 = insertelement <2 x i32> %7, i32 %6, i32 1 +;CHECK: %9 = getelementptr inbounds i32, i32* %A, i64 %index +;CHECK: %10 = bitcast i32* %9 to <2 x i32>* +;CHECK: store <2 x i32> %8, <2 x i32>* %10, align 4 + +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %ptr = getelementptr inbounds i32*, i32** %PtrPtr, i64 %indvars.iv + %el = load i32*, i32** %ptr + %v = load i32, i32* %el + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %v, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 10000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll index b2933c4b56f2..4dc62d86453f 100644 --- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -11,38 +11,38 @@ ; break; ; } ; } +; File, line, and column should match those specified in the metadata +; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations +; CHECK: remark: source.cpp:4:5: loop not vectorized ; void test_disabled(int *A, int Length) { ; #pragma clang loop vectorize(disable) interleave(disable) ; for (int i = 0; i < Length; i++) ; A[i] = i; ; } +; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1 ; void test_array_bounds(int *A, int *B, int Length) { ; #pragma clang loop vectorize(enable) ; for (int i = 0; i < Length; i++) ; A[i] = A[B[i]]; ; } - -; File, line, and column should match those specified in the metadata -; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations -; CHECK: remark: source.cpp:4:5: loop not vectorized -; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1 ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds ; CHECK: remark: source.cpp:19:5: loop not vectorized ; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization -; CHECK: _Z4testPii -; CHECK-NOT: x i32> -; CHECK: ret - -; CHECK: _Z13test_disabledPii -; CHECK-NOT: x i32> -; CHECK: ret - -; CHECK: _Z17test_array_boundsPiS_i -; CHECK-NOT: x i32> -; CHECK: ret +; int foo(); +; void test_multiple_failures(int *A) { +; int k = 0; +; #pragma clang loop vectorize(enable) interleave(enable) +; for (int i = 0; i < 1000; i+=A[i]) { +; if (A[i]) +; k = foo(); +; } +; return k; +; } +; CHECK: remark: source.cpp:29:7: loop not vectorized: control flow cannot be substituted for a select +; CHECK: remark: source.cpp:27:3: loop not vectorized ; YAML: --- !Analysis ; YAML-NEXT: Pass: loop-vectorize @@ -98,6 +98,41 @@ ; YAML-NEXT: - String: 'loop not vectorized: ' ; YAML-NEXT: - String: failed explicitly specified loop vectorization ; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: NoCFGForSelect +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 29, Column: 7 } +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: control flow cannot be substituted for a select +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: NonReductionValueUsedOutsideLoop +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: value that could not be identified as reduction is used outside the loop +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: CantComputeNumberOfIterations +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: could not determine number of loop iterations +; YAML-NEXT: ... +; YAML-NEXT: --- !Missed +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: MissedDetails +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 } +; YAML-NEXT: Function: test_multiple_failures +; YAML-NEXT: Args: +; YAML-NEXT: - String: loop not vectorized +; YAML-NEXT: ... target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -124,6 +159,10 @@ for.end: ; preds = %for.body, %entry ret void, !dbg !24 } +; CHECK: _Z4testPii +; CHECK-NOT: x i32> +; CHECK: ret + ; Function Attrs: nounwind optsize ssp uwtable define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 { entry: @@ -144,6 +183,10 @@ for.end: ; preds = %for.body, %entry ret void, !dbg !31 } +; CHECK: _Z13test_disabledPii +; CHECK-NOT: x i32> +; CHECK: ret + ; Function Attrs: nounwind optsize ssp uwtable define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 { entry: @@ -174,6 +217,45 @@ for.end: ; preds = %for.end.loopexit, % ret void, !dbg !36 } +; CHECK: _Z17test_array_boundsPiS_i +; CHECK-NOT: x i32> +; CHECK: ret + +; Function Attrs: nounwind uwtable +define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 !dbg !46 { +entry: + br label %for.body, !dbg !38 + +for.body: ; preds = %entry, %for.inc + %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ] + %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09, !dbg !40 + %0 = load i32, i32* %arrayidx, align 4, !dbg !40 + %tobool = icmp eq i32 %0, 0, !dbg !40 + br i1 %tobool, label %for.inc, label %if.then, !dbg !40 + +if.then: ; preds = %for.body + %call = tail call i32 (...) @foo(), !dbg !41 + %.pre = load i32, i32* %arrayidx, align 4 + br label %for.inc, !dbg !42 + +for.inc: ; preds = %for.body, %if.then + %1 = phi i32 [ %.pre, %if.then ], [ 0, %for.body ], !dbg !43 + %k.1 = phi i32 [ %call, %if.then ], [ %k.09, %for.body ] + %add = add nsw i32 %1, %i.09, !dbg !44 + %cmp = icmp slt i32 %add, 1000, !dbg !45 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !38 + +for.cond.cleanup: ; preds = %for.inc + ret i32 %k.1, !dbg !39 +} + +declare i32 @foo(...) + +; CHECK: test_multiple_failure +; CHECK-NOT: x i32> +; CHECK: ret + attributes #0 = { nounwind } !llvm.dbg.cu = !{!0} @@ -216,3 +298,13 @@ attributes #0 = { nounwind } !34 = !{!34, !15} !35 = !DILocation(line: 19, column: 5, scope: !33) !36 = !DILocation(line: 20, column: 1, scope: !8) +!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46) +!38 = !DILocation(line: 27, column: 3, scope: !37) +!39 = !DILocation(line: 31, column: 3, scope: !37) +!40 = !DILocation(line: 28, column: 9, scope: !37) +!41 = !DILocation(line: 29, column: 11, scope: !37) +!42 = !DILocation(line: 29, column: 7, scope: !37) +!43 = !DILocation(line: 27, column: 32, scope: !37) +!44 = !DILocation(line: 27, column: 30, scope: !37) +!45 = !DILocation(line: 27, column: 21, scope: !37) +!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, variables: !2) diff --git a/test/Transforms/NewGVN/pr32403.ll b/test/Transforms/NewGVN/pr32403.ll index 2552e0e66ab9..505d31a9463e 100644 --- a/test/Transforms/NewGVN/pr32403.ll +++ b/test/Transforms/NewGVN/pr32403.ll @@ -17,7 +17,8 @@ define void @reorder_ref_pic_list() local_unnamed_addr { ; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[REFIDXLX_0]], 1 ; CHECK-NEXT: br label [[FOR_BODY8_I:%.*]] ; CHECK: for.body8.i: -; CHECK-NEXT: br i1 undef, label [[FOR_INC24_I:%.*]], label [[IF_THEN17_I:%.*]] +; CHECK-NEXT: [[NIDX_052_I:%.*]] = phi i32 [ [[INC_I]], [[IF_THEN13]] ], [ [[NIDX_052_I]], [[FOR_INC24_I:%.*]] ] +; CHECK-NEXT: br i1 undef, label [[FOR_INC24_I]], label [[IF_THEN17_I:%.*]] ; CHECK: if.then17.i: ; CHECK-NEXT: br label [[FOR_INC24_I]] ; CHECK: for.inc24.i: diff --git a/test/Transforms/NewGVN/pr32836.ll b/test/Transforms/NewGVN/pr32836.ll new file mode 100644 index 000000000000..623f216101bf --- /dev/null +++ b/test/Transforms/NewGVN/pr32836.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -newgvn %s | FileCheck %s + +%struct.anon = type { i32 } +@b = external global %struct.anon +define void @tinkywinky(i1 %patatino) { +; CHECK-LABEL: @tinkywinky( +; CHECK-NEXT: store i32 8, i32* null +; CHECK-NEXT: br i1 [[PATATINO:%.*]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[L:%.*]] +; CHECK: L: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* null +; CHECK-NEXT: [[BF_LOAD1:%.*]] = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0) +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i32 [[TMP1]], 536870911 +; CHECK-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -536870912 +; CHECK-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +; CHECK-NEXT: store i32 [[BF_SET]], i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0) +; CHECK-NEXT: br label [[LOR_END:%.*]] +; CHECK: lor.end: +; CHECK-NEXT: br label [[L]] +; + store i32 8, i32* null + br i1 %patatino, label %if.end, label %if.then +if.then: + store i32 8, i32* null + br label %L +L: + br label %if.end +if.end: + %tmp1 = load i32, i32* null + %bf.load1 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @b, i64 0, i32 0) + %bf.value = and i32 %tmp1, 536870911 + %bf.clear = and i32 %bf.load1, -536870912 + %bf.set = or i32 %bf.clear, %bf.value + store i32 %bf.set, i32* getelementptr (%struct.anon, %struct.anon* @b, i64 0, i32 0) + br label %lor.end +lor.end: + %bf.load4 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @b, i64 0, i32 0) + %tmp4 = and i32 %bf.load4, 536870911 + %or = or i32 0, %tmp4 + br label %L +} diff --git a/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll index 3ac3c5138ae7..a97e3f81a8ef 100644 --- a/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll +++ b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll @@ -382,3 +382,64 @@ loop_exit2: ; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]] ; CHECK-NEXT: ret i32 %[[R]] } + +; This test, extracted from the LLVM test suite, has an interesting dominator +; tree to update as there are edges to sibling domtree nodes within child +; domtree nodes of the unswitched node. +define void @xgets(i1 %cond1, i1* %cond2.ptr) { +; CHECK-LABEL: @xgets( +entry: + br label %for.cond.preheader +; CHECK: entry: +; CHECK-NEXT: br label %for.cond.preheader + +for.cond.preheader: + br label %for.cond +; CHECK: for.cond.preheader: +; CHECK-NEXT: br i1 %cond1, label %for.cond.preheader.split, label %if.end17.thread.loopexit +; +; CHECK: for.cond.preheader.split: +; CHECK-NEXT: br label %for.cond + +for.cond: + br i1 %cond1, label %land.lhs.true, label %if.end17.thread.loopexit +; CHECK: for.cond: +; CHECK-NEXT: br label %land.lhs.true + +land.lhs.true: + br label %if.then20 +; CHECK: land.lhs.true: +; CHECK-NEXT: br label %if.then20 + +if.then20: + %cond2 = load volatile i1, i1* %cond2.ptr + br i1 %cond2, label %if.then23, label %if.else +; CHECK: if.then20: +; CHECK-NEXT: %[[COND2:.*]] = load volatile i1, i1* %cond2.ptr +; CHECK-NEXT: br i1 %[[COND2]], label %if.then23, label %if.else + +if.else: + br label %for.cond +; CHECK: if.else: +; CHECK-NEXT: br label %for.cond + +if.end17.thread.loopexit: + br label %if.end17.thread +; CHECK: if.end17.thread.loopexit: +; CHECK-NEXT: br label %if.end17.thread + +if.end17.thread: + br label %cleanup +; CHECK: if.end17.thread: +; CHECK-NEXT: br label %cleanup + +if.then23: + br label %cleanup +; CHECK: if.then23: +; CHECK-NEXT: br label %cleanup + +cleanup: + ret void +; CHECK: cleanup: +; CHECK-NEXT: ret void +} |