diff options
Diffstat (limited to 'test/Transforms')
49 files changed, 2166 insertions, 424 deletions
diff --git a/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll new file mode 100644 index 000000000000..7ce8ab3ac521 --- /dev/null +++ b/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll @@ -0,0 +1,38 @@ +; RUN: opt -argpromotion -verify -dse -S %s -o - | FileCheck %s + +; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to +; dbg.value which still used the removed argument. + +%p_t = type i16* +%fun_t = type void (%p_t)* + +define void @foo() { + %tmp = alloca %fun_t + store %fun_t @bar, %fun_t* %tmp + ret void +} + +define internal void @bar(%p_t %p) { + call void @llvm.dbg.value(metadata %p_t %p, i64 0, metadata !4, metadata !5), !dbg !6 + ret void +} + +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "bar", unit: !0) +!4 = !DILocalVariable(name: "p", scope: !3) +!5 = !DIExpression() +!6 = !DILocation(line: 1, column: 1, scope: !3) + +; The %p argument should be removed, and the use of it in dbg.value should be +; changed to undef. +; CHECK: define internal void @bar() { +; CHECK-NEXT: call void @llvm.dbg.value(metadata i16* undef +; CHECK-NEXT: ret void +; CHECK-NEXT: } diff --git a/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/test/Transforms/CodeGenPrepare/X86/memcmp.ll index 2435cd7d0a83..4b9e7c3956f5 100644 --- a/test/Transforms/CodeGenPrepare/X86/memcmp.ll +++ b/test/Transforms/CodeGenPrepare/X86/memcmp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -codegenprepare -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X32 ; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64 @@ -5,8 +6,8 @@ declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp2( -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i16* -; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i16* +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* ; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] ; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] ; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) @@ -23,7 +24,7 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp3( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 3) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 3) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3) @@ -32,8 +33,8 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp4( -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32* -; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i32* +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* ; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) @@ -50,7 +51,7 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp5( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 5) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 5) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5) @@ -59,7 +60,7 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp6( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 6) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 6) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6) @@ -68,7 +69,7 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp7( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 7) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7) @@ -77,12 +78,12 @@ define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp8( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8) +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 8) ; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp8( -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i64* -; X64-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i64* +; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) @@ -99,7 +100,7 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp9( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 9) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 9) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9) @@ -108,7 +109,7 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp10( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 10) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 10) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10) @@ -117,7 +118,7 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp11( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 11) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11) @@ -126,7 +127,7 @@ define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp12( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 12) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 12) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12) @@ -135,7 +136,7 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp13(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp13( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 13) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13) @@ -144,7 +145,7 @@ define i32 @cmp13(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp14(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp14( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 14) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14) @@ -153,7 +154,7 @@ define i32 @cmp14(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp15( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 15) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15) @@ -162,7 +163,7 @@ define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp16( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 16) ; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) @@ -171,8 +172,8 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq2( -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i16* -; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i16* +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* ; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] ; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] ; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] @@ -189,7 +190,7 @@ define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq3( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 3) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 3) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -202,8 +203,8 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq4( -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32* -; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i32* +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* ; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] @@ -220,7 +221,7 @@ define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq5( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 5) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 5) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -233,7 +234,7 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq6( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 6) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 6) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -246,7 +247,7 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq7( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 7) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -259,14 +260,14 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq8( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8) +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 8) ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; ; X64-LABEL: @cmp_eq8( -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i64* -; X64-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i64* +; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] @@ -283,7 +284,7 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq9( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 9) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 9) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -296,7 +297,7 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq10( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 10) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 10) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -309,7 +310,7 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq11( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 11) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -322,7 +323,7 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq12( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 12) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 12) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -335,7 +336,7 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq13( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 13) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -348,7 +349,7 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq14( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 14) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -361,7 +362,7 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq15( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 15) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] @@ -374,7 +375,7 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq16( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 16) ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll index 9d6e668167fb..b6b775797826 100644 --- a/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll +++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll @@ -4,6 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" +@x = external global [1 x [2 x <4 x float>]] + ; Can we sink single addressing mode computation to use? define void @test1(i1 %cond, i64* %base) { ; CHECK-LABEL: @test1 @@ -194,3 +196,25 @@ rare.2: declare void @slowpath(i32, i32*) + +; Make sure we don't end up in an infinite loop after we fail to sink. +; CHECK-LABEL: define void @test8 +; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef +define void @test8() { +allocas: + %aFOO_load = load float*, float** undef + %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64 + %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0 + %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8* + %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef + br label %load.i145 + +load.i145: + %ptr.i143 = bitcast i8* %ptr to <4 x float>* + %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4 + %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0 + br label %pl_loop.i.i122 + +pl_loop.i.i122: + br label %pl_loop.i.i122 +} diff --git a/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll b/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll new file mode 100644 index 000000000000..3808c0e61c10 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll @@ -0,0 +1,16 @@ +; RUN: opt -S -codegenprepare %s -o - | FileCheck %s +; +; Ensure that we don't {crash,return a bad value} when given an alloca larger +; than what a pointer can represent. + +target datalayout = "p:16:16" + +; CHECK-LABEL: @alloca_overflow_is_unknown( +define i16 @alloca_overflow_is_unknown() { + %i = alloca i8, i32 65537 + %j = call i16 @llvm.objectsize.i16.p0i8(i8* %i, i1 false, i1 false) + ; CHECK: ret i16 -1 + ret i16 %j +} + +declare i16 @llvm.objectsize.i16.p0i8(i8*, i1, i1) diff --git a/test/Transforms/ConstantHoisting/ARM/bad-cases.ll b/test/Transforms/ConstantHoisting/ARM/bad-cases.ll index ffcfb2e56c95..315e69998c62 100644 --- a/test/Transforms/ConstantHoisting/ARM/bad-cases.ll +++ b/test/Transforms/ConstantHoisting/ARM/bad-cases.ll @@ -107,3 +107,34 @@ entry: %ret = add i32 %cast0, %cast1 ret i32 %ret } + +@exception_type = external global i8 + +; Constants in inline ASM should not be hoisted. +define i32 @inline_asm_invoke() personality i8* null { +;CHECK-LABEL: @inline_asm_invoke +;CHECK-NOT: %const = 214672 +;CHECK: %X = invoke i32 asm "bswap $0", "=r,r"(i32 214672) + %X = invoke i32 asm "bswap $0", "=r,r"(i32 214672) + to label %L unwind label %lpad +;CHECK: %Y = invoke i32 asm "bswap $0", "=r,r"(i32 214672) + %Y = invoke i32 asm "bswap $0", "=r,r"(i32 214672) + to label %L unwind label %lpad +L: + ret i32 %X +lpad: + %lp = landingpad i32 + cleanup + catch i8* @exception_type + ret i32 1 +} + +define i32 @inline_asm_call() { +;CHECK-LABEL: @inline_asm_call +;CHECK-NOT: %const = 214672 +;CHECK: %X = call i32 asm "bswap $0", "=r,r"(i32 214672) + %X = call i32 asm "bswap $0", "=r,r"(i32 214672) +;CHECK: %Y = call i32 asm "bswap $0", "=r,r"(i32 214672) + %Y = call i32 asm "bswap $0", "=r,r"(i32 214672) + ret i32 %X +} diff --git a/test/Transforms/ConstantHoisting/ARM/insertvalue.ll b/test/Transforms/ConstantHoisting/ARM/insertvalue.ll new file mode 100644 index 000000000000..99fe7fbe22a5 --- /dev/null +++ b/test/Transforms/ConstantHoisting/ARM/insertvalue.ll @@ -0,0 +1,31 @@ +; RUN: opt -consthoist -S < %s | FileCheck %s +target triple = "thumbv6m-none-eabi" + +%T = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, +i32, i32, i32, i32, i32, i32 } + +; The second operand of insertvalue is able to be hoisted. +define void @test1(%T %P) { +; CHECK-LABEL: @test1 +; CHECK: %const = bitcast i32 256 to i32 +; CHECK: %1 = insertvalue %T %P, i32 %const, 256 +; CHECK: %2 = insertvalue %T %P, i32 %const, 256 + %1 = insertvalue %T %P, i32 256, 256 + %2 = insertvalue %T %P, i32 256, 256 + ret void +} diff --git a/test/Transforms/ConstantHoisting/X86/ehpad.ll b/test/Transforms/ConstantHoisting/X86/ehpad.ll index 4f87572f3447..5e345c4515d7 100644 --- a/test/Transforms/ConstantHoisting/X86/ehpad.ll +++ b/test/Transforms/ConstantHoisting/X86/ehpad.ll @@ -1,9 +1,6 @@ -; RUN: opt -S -consthoist < %s | FileCheck %s +; RUN: opt -S -consthoist -consthoist-with-block-frequency=false < %s | FileCheck %s ; RUN: opt -S -consthoist -consthoist-with-block-frequency=true < %s | FileCheck --check-prefix=BFIHOIST %s -; FIXME: The catchpad doesn't even use the constant, so a better fix would be to -; insert the bitcast in the catchpad block. - target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc" diff --git a/test/Transforms/GVN/PRE/atomic.ll b/test/Transforms/GVN/PRE/atomic.ll index 509acd613e95..3479bc9a0e33 100644 --- a/test/Transforms/GVN/PRE/atomic.ll +++ b/test/Transforms/GVN/PRE/atomic.ll @@ -208,14 +208,14 @@ define void @fence_seq_cst(i32* %P1, i32* %P2) { ret void } -; Can't DSE across a full singlethread fence +; Can't DSE across a full syncscope("singlethread") fence define void @fence_seq_cst_st(i32* %P1, i32* %P2) { ; CHECK-LABEL: @fence_seq_cst_st( ; CHECK: store -; CHECK: fence singlethread seq_cst +; CHECK: fence syncscope("singlethread") seq_cst ; CHECK: store store i32 0, i32* %P1, align 4 - fence singlethread seq_cst + fence syncscope("singlethread") seq_cst store i32 0, i32* %P1, align 4 ret void } diff --git a/test/Transforms/GVN/PRE/phi-translate-2.ll b/test/Transforms/GVN/PRE/phi-translate-2.ll deleted file mode 100644 index 78681e20df5e..000000000000 --- a/test/Transforms/GVN/PRE/phi-translate-2.ll +++ /dev/null @@ -1,131 +0,0 @@ -; RUN: opt < %s -gvn -S | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -@a = common global [100 x i64] zeroinitializer, align 16 -@b = common global [100 x i64] zeroinitializer, align 16 -@g1 = common global i64 0, align 8 -@g2 = common global i64 0, align 8 -@g3 = common global i64 0, align 8 -declare i64 @goo(...) local_unnamed_addr #1 - -define void @test1(i64 %a, i64 %b, i64 %c, i64 %d) { -entry: - %mul = mul nsw i64 %b, %a - store i64 %mul, i64* @g1, align 8 - %t0 = load i64, i64* @g2, align 8 - %cmp = icmp sgt i64 %t0, 3 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %mul2 = mul nsw i64 %d, %c - store i64 %mul2, i64* @g2, align 8 - br label %if.end - -; Check phi-translate works and mul is removed. -; CHECK-LABEL: @test1( -; CHECK: if.end: -; CHECK: %[[MULPHI:.*]] = phi i64 [ {{.*}}, %if.then ], [ %mul, %entry ] -; CHECK-NOT: = mul -; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8 -if.end: ; preds = %if.then, %entry - %b.addr.0 = phi i64 [ %d, %if.then ], [ %b, %entry ] - %a.addr.0 = phi i64 [ %c, %if.then ], [ %a, %entry ] - %mul3 = mul nsw i64 %a.addr.0, %b.addr.0 - store i64 %mul3, i64* @g3, align 8 - ret void -} - -define void @test2(i64 %i) { -entry: - %arrayidx = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i - %t0 = load i64, i64* %arrayidx, align 8 - %arrayidx1 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i - %t1 = load i64, i64* %arrayidx1, align 8 - %mul = mul nsw i64 %t1, %t0 - store i64 %mul, i64* @g1, align 8 - %cmp = icmp sgt i64 %mul, 3 - br i1 %cmp, label %if.then, label %if.end - -; Check phi-translate works for the phi generated by loadpre. A new mul will be -; inserted in if.then block. -; CHECK-LABEL: @test2( -; CHECK: if.then: -; CHECK: %[[MUL_THEN:.*]] = mul -; CHECK: br label %if.end -if.then: ; preds = %entry - %call = tail call i64 (...) @goo() #2 - store i64 %call, i64* @g2, align 8 - br label %if.end - -; CHECK: if.end: -; CHECK: %[[MULPHI:.*]] = phi i64 [ %[[MUL_THEN]], %if.then ], [ %mul, %entry ] -; CHECK-NOT: = mul -; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8 -if.end: ; preds = %if.then, %entry - %i.addr.0 = phi i64 [ 3, %if.then ], [ %i, %entry ] - %arrayidx3 = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i.addr.0 - %t2 = load i64, i64* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i.addr.0 - %t3 = load i64, i64* %arrayidx4, align 8 - %mul5 = mul nsw i64 %t3, %t2 - store i64 %mul5, i64* @g3, align 8 - ret void -} - -; Check phi-translate doesn't go through backedge, which may lead to incorrect -; pre transformation. -; CHECK: for.end: -; CHECK-NOT: %{{.*pre-phi}} = phi -; CHECK: ret void -define void @test3(i64 %N, i64* nocapture readonly %a) { -entry: - br label %for.cond - -for.cond: ; preds = %for.body, %entry - %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ] - %add = add nuw nsw i64 %i.0, 1 - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %add - %tmp0 = load i64, i64* %arrayidx, align 8 - %cmp = icmp slt i64 %i.0, %N - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %call = tail call i64 (...) @goo() #2 - %add1 = sub nsw i64 0, %call - %tobool = icmp eq i64 %tmp0, %add1 - br i1 %tobool, label %for.cond, label %for.end - -for.end: ; preds = %for.body, %for.cond - %i.0.lcssa = phi i64 [ %i.0, %for.body ], [ %i.0, %for.cond ] - %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.0.lcssa - %tmp1 = load i64, i64* %arrayidx2, align 8 - store i64 %tmp1, i64* @g1, align 8 - ret void -} - -; It is incorrect to use the value of %andres in last loop iteration -; to do pre. -; CHECK-LABEL: @test4( -; CHECK: for.body: -; CHECK-NOT: %andres.pre-phi = phi i32 -; CHECK: br i1 %tobool1 - -define i32 @test4(i32 %cond, i32 %SectionAttrs.0231.ph, i32 *%AttrFlag) { -for.body.preheader: - %t514 = load volatile i32, i32* %AttrFlag - br label %for.body - -for.body: - %t320 = phi i32 [ %t334, %bb343 ], [ %t514, %for.body.preheader ] - %andres = and i32 %t320, %SectionAttrs.0231.ph - %tobool1 = icmp eq i32 %andres, 0 - br i1 %tobool1, label %bb343, label %critedge.loopexit - -bb343: - %t334 = load volatile i32, i32* %AttrFlag - %tobool2 = icmp eq i32 %cond, 0 - br i1 %tobool2, label %critedge.loopexit, label %for.body - -critedge.loopexit: - unreachable -} diff --git a/test/Transforms/GVN/PRE/pre-gep-load.ll b/test/Transforms/GVN/PRE/pre-gep-load.ll index 1b2b4d20d31d..9eec8bb6455b 100644 --- a/test/Transforms/GVN/PRE/pre-gep-load.ll +++ b/test/Transforms/GVN/PRE/pre-gep-load.ll @@ -37,7 +37,7 @@ sw.bb2: ; preds = %if.end, %entry %3 = load double, double* %arrayidx5, align 8 ; CHECK: sw.bb2: ; CHECK-NOT: sext -; CHECK: phi double [ +; CHECK-NEXT: phi double [ ; CHECK-NOT: load %sub6 = fsub double 3.000000e+00, %3 br label %return diff --git a/test/Transforms/GVN/PRE/pre-load.ll b/test/Transforms/GVN/PRE/pre-load.ll index ffff2b7f08e5..685df24f62b6 100644 --- a/test/Transforms/GVN/PRE/pre-load.ll +++ b/test/Transforms/GVN/PRE/pre-load.ll @@ -72,7 +72,7 @@ block4: %PRE = load i32, i32* %P3 ret i32 %PRE ; CHECK: block4: -; CHECK: phi i32 [ +; CHECK-NEXT: phi i32 [ ; CHECK-NOT: load ; CHECK: ret i32 } @@ -104,7 +104,7 @@ block4: %PRE = load i32, i32* %P3 ret i32 %PRE ; CHECK: block4: -; CHECK: phi i32 [ +; CHECK-NEXT: phi i32 [ ; CHECK-NOT: load ; CHECK: ret i32 } @@ -263,7 +263,7 @@ block4: %PRE = load i32, i32* %P3 ret i32 %PRE ; CHECK: block4: -; CHECK: phi i32 [ +; CHECK-NEXT: phi i32 [ ; CHECK-NOT: load ; CHECK: ret i32 } diff --git a/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/test/Transforms/IndVarSimplify/canonicalize-cmp.ll new file mode 100644 index 000000000000..2b939767284a --- /dev/null +++ b/test/Transforms/IndVarSimplify/canonicalize-cmp.ll @@ -0,0 +1,98 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +; Check that we replace signed comparisons between non-negative values with +; unsigned comparisons if we can. + +target datalayout = "n8:16:32:64" + +define i32 @test_01(i32 %a, i32 %b, i32* %p) { + +; CHECK-LABEL: @test_01( +; CHECK-NOT: icmp slt +; CHECK: %cmp1 = icmp ult i32 %iv, 100 +; CHECK: %cmp2 = icmp ult i32 %iv, 100 +; CHECK-NOT: %cmp3 +; CHECK: %exitcond = icmp ne i32 %iv.next, 1000 + +entry: + br label %loop.entry + +loop.entry: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.be ] + %cmp1 = icmp slt i32 %iv, 100 + br i1 %cmp1, label %b1, label %b2 + +b1: + store i32 %iv, i32* %p + br label %merge + +b2: + store i32 %a, i32* %p + br label %merge + +merge: + %cmp2 = icmp ult i32 %iv, 100 + br i1 %cmp2, label %b3, label %b4 + +b3: + store i32 %iv, i32* %p + br label %loop.be + +b4: + store i32 %b, i32* %p + br label %loop.be + +loop.be: + %iv.next = add i32 %iv, 1 + %cmp3 = icmp slt i32 %iv.next, 1000 + br i1 %cmp3, label %loop.entry, label %exit + +exit: + ret i32 %iv +} + +define i32 @test_02(i32 %a, i32 %b, i32* %p) { + +; CHECK-LABEL: @test_02( +; CHECK-NOT: icmp sgt +; CHECK: %cmp1 = icmp ugt i32 100, %iv +; CHECK: %cmp2 = icmp ugt i32 100, %iv +; CHECK-NOT: %cmp3 +; CHECK: %exitcond = icmp ne i32 %iv.next, 1000 + +entry: + br label %loop.entry + +loop.entry: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.be ] + %cmp1 = icmp sgt i32 100, %iv + br i1 %cmp1, label %b1, label %b2 + +b1: + store i32 %iv, i32* %p + br label %merge + +b2: + store i32 %a, i32* %p + br label %merge + +merge: + %cmp2 = icmp ugt i32 100, %iv + br i1 %cmp2, label %b3, label %b4 + +b3: + store i32 %iv, i32* %p + br label %loop.be + +b4: + store i32 %b, i32* %p + br label %loop.be + +loop.be: + %iv.next = add i32 %iv, 1 + %cmp3 = icmp sgt i32 1000, %iv.next + br i1 %cmp3, label %loop.entry, label %exit + +exit: + ret i32 %iv +} diff --git a/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/test/Transforms/IndVarSimplify/eliminate-comparison.ll index 612f01e3cade..a63617e62c0e 100644 --- a/test/Transforms/IndVarSimplify/eliminate-comparison.ll +++ b/test/Transforms/IndVarSimplify/eliminate-comparison.ll @@ -111,7 +111,7 @@ return: ; Indvars should not turn the second loop into an infinite one. ; CHECK-LABEL: @func_11( -; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10 +; CHECK: %tmp5 = icmp ult i32 %__key6.0, 10 ; CHECK-NOT: br i1 true, label %noassert68, label %unrolledend define i32 @func_11() nounwind uwtable { @@ -163,7 +163,7 @@ declare void @llvm.trap() noreturn nounwind ; In this case the second loop only has a single iteration, fold the header away ; CHECK-LABEL: @func_12( -; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10 +; CHECK: %tmp5 = icmp ult i32 %__key6.0, 10 ; CHECK: br i1 true, label %noassert68, label %unrolledend define i32 @func_12() nounwind uwtable { entry: diff --git a/test/Transforms/IndVarSimplify/strengthen-overflow.ll b/test/Transforms/IndVarSimplify/strengthen-overflow.ll index 2bafe96e1ccc..6e0538e04d6b 100644 --- a/test/Transforms/IndVarSimplify/strengthen-overflow.ll +++ b/test/Transforms/IndVarSimplify/strengthen-overflow.ll @@ -104,5 +104,89 @@ define i32 @test.unsigned.add.1(i32* %array, i32 %length, i32 %init) { ret i32 42 } +define hidden void @test.shl.exact.equal() { +; CHECK-LABEL: @test.shl.exact.equal +entry: + br label %for.body + +for.body: +; CHECK-LABEL: for.body + %k.021 = phi i32 [ 1, %entry ], [ %inc, %for.body ] + %shl = shl i32 1, %k.021 + %shr1 = ashr i32 %shl, 1 +; CHECK: %shr1 = ashr exact i32 %shl, 1 + %shr2 = lshr i32 %shl, 1 +; CHECK: %shr2 = lshr exact i32 %shl, 1 + %inc = add nuw nsw i32 %k.021, 1 + %exitcond = icmp eq i32 %inc, 9 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define hidden void @test.shl.exact.greater() { +; CHECK-LABEL: @test.shl.exact.greater +entry: + br label %for.body + +for.body: +; CHECK-LABEL: for.body + %k.021 = phi i32 [ 3, %entry ], [ %inc, %for.body ] + %shl = shl i32 1, %k.021 + %shr1 = ashr i32 %shl, 2 +; CHECK: %shr1 = ashr exact i32 %shl, 2 + %shr2 = lshr i32 %shl, 2 +; CHECK: %shr2 = lshr exact i32 %shl, 2 + %inc = add nuw nsw i32 %k.021, 1 + %exitcond = icmp eq i32 %inc, 9 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define hidden void @test.shl.exact.unbound(i32 %arg) { +; CHECK-LABEL: @test.shl.exact.unbound +entry: + br label %for.body + +for.body: +; CHECK-LABEL: for.body + %k.021 = phi i32 [ 2, %entry ], [ %inc, %for.body ] + %shl = shl i32 1, %k.021 + %shr1 = ashr i32 %shl, 2 +; CHECK: %shr1 = ashr exact i32 %shl, 2 + %shr2 = lshr i32 %shl, 2 +; CHECK: %shr2 = lshr exact i32 %shl, 2 + %inc = add nuw nsw i32 %k.021, 1 + %exitcond = icmp eq i32 %inc, %arg + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define hidden void @test.shl.nonexact() { +; CHECK-LABEL: @test.shl.nonexact +entry: + br label %for.body + +for.body: +; CHECK-LABEL: for.body + %k.021 = phi i32 [ 2, %entry ], [ %inc, %for.body ] + %shl = shl i32 1, %k.021 + %shr1 = ashr i32 %shl, 3 +; CHECK: %shr1 = ashr i32 %shl, 3 + %shr2 = lshr i32 %shl, 3 +; CHECK: %shr2 = lshr i32 %shl, 3 + %inc = add nuw nsw i32 %k.021, 1 + %exitcond = icmp eq i32 %inc, 9 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + !0 = !{i32 0, i32 2} !1 = !{i32 0, i32 42} diff --git a/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/test/Transforms/IndVarSimplify/widen-loop-comp.ll index b87cd0550192..2d24cd732ce8 100644 --- a/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ b/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -64,7 +64,7 @@ for.end: ; CHECK-LABEL: @test2 ; CHECK: for.body4.us ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK: %cmp2.us = icmp slt i64 +; CHECK: %cmp2.us = icmp ult i64 ; CHECK-NOT: %2 = trunc i64 %indvars.iv.next to i32 ; CHECK-NOT: %cmp2.us = icmp slt i32 diff --git a/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll index b566c147e9b8..1eab70754030 100644 --- a/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll +++ b/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll @@ -170,4 +170,16 @@ define { i32 addrspace(4)*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32 addrsp ret { i32 addrspace(4)*, i1 } %ret } +; Null pointer in local addr space +; CHECK-LABEL: @local_nullptr +; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*) +; CHECK-NOT: i8 addrspace(3)* null +define void @local_nullptr(i32 addrspace(1)* nocapture %results, i8 addrspace(3)* %a) { +entry: + %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*) + %conv = zext i1 %tobool to i32 + store i32 %conv, i32 addrspace(1)* %results, align 4 + ret void +} + attributes #0 = { nounwind } diff --git a/test/Transforms/Inline/ARM/inline-target-attr.ll b/test/Transforms/Inline/ARM/inline-target-attr.ll new file mode 100644 index 000000000000..5bbecd203528 --- /dev/null +++ b/test/Transforms/Inline/ARM/inline-target-attr.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -inline | FileCheck %s +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s +; Check that we only inline when we have compatible target attributes. +; ARM has implemented a target attribute that will verify that the attribute +; sets are compatible. + +define i32 @foo() #0 { +entry: + %call = call i32 (...) @baz() + ret i32 %call +; CHECK-LABEL: foo +; CHECK: call i32 (...) @baz() +} +declare i32 @baz(...) #0 + +define i32 @bar() #1 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: bar +; CHECK: call i32 (...) @baz() +} + +define i32 @qux() #0 { +entry: + %call = call i32 @bar() + ret i32 %call +; CHECK-LABEL: qux +; CHECK: call i32 @bar() +} + +define i32 @thumb_fn() #2 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: thumb_fn +; CHECK: call i32 @foo +} + +define i32 @strict_align() #3 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: strict_align +; CHECK: call i32 (...) @baz() +} + +define i32 @soft_float_fn() #4 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: soft_float_fn +; CHECK: call i32 @foo +} + +attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" } +attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16" } +attributes #2 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+thumb-mode" } +attributes #3 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+strict-align" } +attributes #4 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+soft-float" } diff --git a/test/Transforms/Inline/ARM/lit.local.cfg b/test/Transforms/Inline/ARM/lit.local.cfg new file mode 100644 index 000000000000..236e1d344166 --- /dev/null +++ b/test/Transforms/Inline/ARM/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/Inline/cgscc-incremental-invalidate.ll b/test/Transforms/Inline/cgscc-incremental-invalidate.ll index 82d321ccf225..164f7a66a6f3 100644 --- a/test/Transforms/Inline/cgscc-incremental-invalidate.ll +++ b/test/Transforms/Inline/cgscc-incremental-invalidate.ll @@ -11,17 +11,35 @@ ; CHECK: Running analysis: FunctionAnalysisManagerCGSCCProxy on (test1_f, test1_g, test1_h) ; CHECK: Running analysis: DominatorTreeAnalysis on test1_f ; CHECK: Running analysis: DominatorTreeAnalysis on test1_g -; CHECK: Invalidating all non-preserved analyses for: (test1_f, test1_g, test1_h) +; CHECK: Invalidating all non-preserved analyses for: (test1_f) ; CHECK: Invalidating all non-preserved analyses for: test1_f ; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_f +; CHECK: Invalidating analysis: LoopAnalysis on test1_f +; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_f +; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_f +; CHECK: Invalidating all non-preserved analyses for: (test1_g, test1_h) ; CHECK: Invalidating all non-preserved analyses for: test1_g ; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_g -; CHECK: Invalidating all non-preserved analyses for: test1_h -; CHECK-NOT: Invalidating anaylsis: -; CHECK: Running analysis: DominatorTreeAnalysis on test1_h -; CHECK: Invalidating all non-preserved analyses for: (test1_g, test1_h) +; CHECK: Invalidating analysis: LoopAnalysis on test1_g +; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_g +; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_g ; CHECK: Invalidating all non-preserved analyses for: test1_h ; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_h +; CHECK: Invalidating analysis: LoopAnalysis on test1_h +; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_h +; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_h +; CHECK-NOT: Invalidating analysis: +; CHECK: Starting llvm::Function pass manager run. +; CHECK-NEXT: Running pass: DominatorTreeVerifierPass on test1_g +; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_g +; CHECK-NEXT: Finished llvm::Function pass manager run. +; CHECK-NEXT: Starting llvm::Function pass manager run. +; CHECK-NEXT: Running pass: DominatorTreeVerifierPass on test1_h +; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_h +; CHECK-NEXT: Finished llvm::Function pass manager run. +; CHECK-NOT: Invalidating analysis: +; CHECK: Running pass: DominatorTreeVerifierPass on test1_f +; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_f ; An external function used to control branches. declare i1 @flag() @@ -109,3 +127,80 @@ entry: ret void ; CHECK: ret void } + +; The 'test2_' prefixed code works to carefully trigger forming an SCC with +; a dominator tree for one of the functions but not the other and without even +; a function analysis manager proxy for the SCC that things get merged into. +; Without proper handling when updating the call graph this will find a stale +; dominator tree. + +@test2_global = external global i32, align 4 + +define void @test2_hoge(i1 (i32*)* %arg) { +; CHECK-LABEL: define void @test2_hoge( +bb: + %tmp2 = call zeroext i1 %arg(i32* @test2_global) +; CHECK: call zeroext i1 %arg( + br label %bb3 + +bb3: + %tmp5 = call zeroext i1 %arg(i32* @test2_global) +; CHECK: call zeroext i1 %arg( + br i1 %tmp5, label %bb3, label %bb6 + +bb6: + ret void +} + +define zeroext i1 @test2_widget(i32* %arg) { +; CHECK-LABEL: define zeroext i1 @test2_widget( +bb: + %tmp1 = alloca i8, align 1 + %tmp2 = alloca i32, align 4 + call void @test2_quux() +; CHECK-NOT: call +; +; CHECK: call zeroext i1 @test2_widget(i32* @test2_global) +; CHECK-NEXT: br label %[[NEW_BB:.*]] +; +; CHECK: [[NEW_BB]]: +; CHECK-NEXT: call zeroext i1 @test2_widget(i32* @test2_global) +; +; CHECK: {{.*}}: + + call void @test2_hoge.1(i32* %arg) +; CHECK-NEXT: call void @test2_hoge.1( + + %tmp4 = call zeroext i1 @test2_barney(i32* %tmp2) + %tmp5 = zext i1 %tmp4 to i32 + store i32 %tmp5, i32* %tmp2, align 4 + %tmp6 = call zeroext i1 @test2_barney(i32* null) + call void @test2_ham(i8* %tmp1) +; CHECK: call void @test2_ham( + + call void @test2_quux() +; CHECK-NOT: call +; +; CHECK: call zeroext i1 @test2_widget(i32* @test2_global) +; CHECK-NEXT: br label %[[NEW_BB:.*]] +; +; CHECK: [[NEW_BB]]: +; CHECK-NEXT: call zeroext i1 @test2_widget(i32* @test2_global) +; +; CHECK: {{.*}}: + ret i1 true +; CHECK-NEXT: ret i1 true +} + +define internal void @test2_quux() { +; CHECK-NOT: @test2_quux +bb: + call void @test2_hoge(i1 (i32*)* @test2_widget) + ret void +} + +declare void @test2_hoge.1(i32*) + +declare zeroext i1 @test2_barney(i32*) + +declare void @test2_ham(i8*) diff --git a/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll b/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll new file mode 100644 index 000000000000..3c4e08b5b515 --- /dev/null +++ b/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; CHECK: llvm.umul.with.overflow +define i32 @sterix(i32, i8, i64) { +entry: + %conv = zext i32 %0 to i64 + %conv1 = sext i8 %1 to i32 + %mul = mul i32 %conv1, 1945964878 + %sh_prom = trunc i64 %2 to i32 + %shr = lshr i32 %mul, %sh_prom + %conv2 = zext i32 %shr to i64 + %mul3 = mul nuw nsw i64 %conv, %conv2 + %conv6 = and i64 %mul3, 4294967295 + %tobool = icmp ne i64 %conv6, %mul3 + br i1 %tobool, label %lor.end, label %lor.rhs + +lor.rhs: + %and = and i64 %2, %mul3 + %conv4 = trunc i64 %and to i32 + %tobool7 = icmp ne i32 %conv4, 0 + %lnot = xor i1 %tobool7, true + br label %lor.end + +lor.end: + %3 = phi i1 [ true, %entry ], [ %lnot, %lor.rhs ] + %conv8 = zext i1 %3 to i32 + ret i32 %conv8 +} + diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll index 1baecb4a13a3..04f7be01eaf5 100644 --- a/test/Transforms/InstCombine/and-or-not.ll +++ b/test/Transforms/InstCombine/and-or-not.ll @@ -570,10 +570,8 @@ define i32 @xor_to_xnor1(float %fa, float %fb) { ; CHECK-LABEL: @xor_to_xnor1( ; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = fptosi float [[FB:%.*]] to i32 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]] -; CHECK-NEXT: [[OR2_DEMORGAN:%.*]] = and i32 [[A]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], [[B]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[XOR]] ; %a = fptosi float %fa to i32 @@ -591,10 +589,8 @@ define i32 @xor_to_xnor2(float %fa, float %fb) { ; CHECK-LABEL: @xor_to_xnor2( ; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = fptosi float [[FB:%.*]] to i32 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[B]] -; CHECK-NEXT: [[OR2_DEMORGAN:%.*]] = and i32 [[B]], [[A]] -; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], [[B]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[XOR]] ; %a = fptosi float %fa to i32 @@ -612,10 +608,8 @@ define i32 @xor_to_xnor3(float %fa, float %fb) { ; CHECK-LABEL: @xor_to_xnor3( ; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = fptosi float [[FB:%.*]] to i32 -; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and i32 [[A]], [[B]] -; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[B]] -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], [[B]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[XOR]] ; %a = fptosi float %fa to i32 @@ -633,10 +627,8 @@ define i32 @xor_to_xnor4(float %fa, float %fb) { ; CHECK-LABEL: @xor_to_xnor4( ; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = fptosi float [[FB:%.*]] to i32 -; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and i32 [[A]], [[B]] -; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[B]], [[A]] -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[A]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[XOR]] ; %a = fptosi float %fa to i32 diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll index 91678a91962a..260e2330996e 100644 --- a/test/Transforms/InstCombine/bswap-fold.ll +++ b/test/Transforms/InstCombine/bswap-fold.ll @@ -1,35 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s -define i1 @test1(i16 %t) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 %t, 256 -; CHECK-NEXT: ret i1 [[TMP2]] -; - %tmp1 = call i16 @llvm.bswap.i16( i16 %t ) - %tmp2 = icmp eq i16 %tmp1, 1 - ret i1 %tmp2 -} - -define i1 @test2(i32 %tmp) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: [[TMP_UPGRD_1:%.*]] = icmp eq i32 %tmp, 16777216 -; CHECK-NEXT: ret i1 [[TMP_UPGRD_1]] -; - %tmp34 = tail call i32 @llvm.bswap.i32( i32 %tmp ) - %tmp.upgrd.1 = icmp eq i32 %tmp34, 1 - ret i1 %tmp.upgrd.1 -} - -define i1 @test3(i64 %tmp) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: [[TMP_UPGRD_2:%.*]] = icmp eq i64 %tmp, 72057594037927936 -; CHECK-NEXT: ret i1 [[TMP_UPGRD_2]] -; - %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp ) - %tmp.upgrd.2 = icmp eq i64 %tmp34, 1 - ret i1 %tmp.upgrd.2 -} - ; rdar://5992453 ; A & 255 define i32 @test4(i32 %a) nounwind { @@ -241,6 +212,136 @@ define i64 @bs_xor64(i64 %a, i64 %b) #0 { ret i64 %tmp3 } +define <2 x i32> @bs_and32vec(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-LABEL: @bs_and32vec( +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) + %tmp3 = and <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} + +define <2 x i32> @bs_or32vec(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-LABEL: @bs_or32vec( +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) + %tmp3 = or <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} + +define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-LABEL: @bs_xor32vec( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) + %tmp3 = xor <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} + +define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-LABEL: @bs_and32ivec( +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440> +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %tmp2 = and <2 x i32> %tmp1, <i32 100001, i32 100001> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-LABEL: @bs_or32ivec( +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440> +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %tmp2 = or <2 x i32> %tmp1, <i32 100001, i32 100001> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 { +; CHECK-LABEL: @bs_xor32ivec( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440> +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; + %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %tmp2 = xor <2 x i32> %tmp1, <i32 100001, i32 100001> + ret <2 x i32> %tmp2 +} + +define i64 @bs_and64_multiuse1(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_and64_multiuse1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]] +; CHECK-NEXT: ret i64 [[TMP5]] +; + %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) + %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b) + %tmp3 = and i64 %tmp1, %tmp2 + %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps + %tmp5 = mul i64 %tmp4, %tmp2 ; to increase use count of the bswaps + ret i64 %tmp5 +} + +define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_and64_multiuse2( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP4]] +; + %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) + %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b) + %tmp3 = and i64 %tmp1, %tmp2 + %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps + ret i64 %tmp4 +} + +define i64 @bs_and64_multiuse3(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_and64_multiuse3( +; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i64 [[TMP4]] +; + %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) + %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b) + %tmp3 = and i64 %tmp1, %tmp2 + %tmp4 = mul i64 %tmp3, %tmp2 ; to increase use count of the bswaps + ret i64 %tmp4 +} + +define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 { +; CHECK-LABEL: @bs_and64i_multiuse( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 1000000001 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) + %tmp2 = and i64 %tmp1, 1000000001 + %tmp3 = mul i64 %tmp2, %tmp1 ; to increase use count of the bswap + ret i64 %tmp3 +} + declare i16 @llvm.bswap.i16(i16) declare i32 @llvm.bswap.i32(i32) declare i64 @llvm.bswap.i64(i64) +declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) diff --git a/test/Transforms/InstCombine/cmp-intrinsic.ll b/test/Transforms/InstCombine/cmp-intrinsic.ll new file mode 100644 index 000000000000..7fc1d12916bf --- /dev/null +++ b/test/Transforms/InstCombine/cmp-intrinsic.ll @@ -0,0 +1,123 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) +declare i33 @llvm.cttz.i33(i33, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i8 @llvm.ctpop.i8(i8) +declare i11 @llvm.ctpop.i11(i11) +declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) +declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) + +define i1 @bswap_eq_i16(i16 %x) { +; CHECK-LABEL: @bswap_eq_i16( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 %x, 256 +; CHECK-NEXT: ret i1 [[CMP]] +; + %bs = call i16 @llvm.bswap.i16(i16 %x) + %cmp = icmp eq i16 %bs, 1 + ret i1 %cmp +} + +define i1 @bswap_ne_i32(i32 %x) { +; CHECK-LABEL: @bswap_ne_i32( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 %x, 33554432 +; CHECK-NEXT: ret i1 [[CMP]] +; + %bs = tail call i32 @llvm.bswap.i32(i32 %x) + %cmp = icmp ne i32 %bs, 2 + ret i1 %cmp +} + +define <2 x i1> @bswap_eq_v2i64(<2 x i64> %x) { +; CHECK-LABEL: @bswap_eq_v2i64( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> %x, <i64 216172782113783808, i64 216172782113783808> +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %bs = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %x) + %cmp = icmp eq <2 x i64> %bs, <i64 3, i64 3> + ret <2 x i1> %cmp +} + +define i1 @ctlz_eq_bitwidth_i32(i32 %x) { +; CHECK-LABEL: @ctlz_eq_bitwidth_i32( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) + %cmp = icmp eq i32 %lz, 32 + ret i1 %cmp +} + +define <2 x i1> @ctlz_ne_bitwidth_v2i32(<2 x i32> %a) { +; CHECK-LABEL: @ctlz_ne_bitwidth_v2i32( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> %a, zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) + %cmp = icmp ne <2 x i32> %x, <i32 32, i32 32> + ret <2 x i1> %cmp +} + +define i1 @cttz_ne_bitwidth_i33(i33 %x) { +; CHECK-LABEL: @cttz_ne_bitwidth_i33( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i33 %x, 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) + %cmp = icmp ne i33 %tz, 33 + ret i1 %cmp +} + +define <2 x i1> @cttz_eq_bitwidth_v2i32(<2 x i32> %a) { +; CHECK-LABEL: @cttz_eq_bitwidth_v2i32( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> %a, zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) + %cmp = icmp eq <2 x i32> %x, <i32 32, i32 32> + ret <2 x i1> %cmp +} + +define i1 @ctpop_eq_zero_i11(i11 %x) { +; CHECK-LABEL: @ctpop_eq_zero_i11( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i11 %x, 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %pop = tail call i11 @llvm.ctpop.i11(i11 %x) + %cmp = icmp eq i11 %pop, 0 + ret i1 %cmp +} + +define <2 x i1> @ctpop_ne_zero_v2i32(<2 x i32> %x) { +; CHECK-LABEL: @ctpop_ne_zero_v2i32( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> %x, zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x) + %cmp = icmp ne <2 x i32> %pop, zeroinitializer + ret <2 x i1> %cmp +} + +define i1 @ctpop_eq_bitwidth_i8(i8 %x) { +; CHECK-LABEL: @ctpop_eq_bitwidth_i8( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 %x, -1 +; CHECK-NEXT: ret i1 [[CMP]] +; + %pop = tail call i8 @llvm.ctpop.i8(i8 %x) + %cmp = icmp eq i8 %pop, 8 + ret i1 %cmp +} + +define <2 x i1> @ctpop_ne_bitwidth_v2i32(<2 x i32> %x) { +; CHECK-LABEL: @ctpop_ne_bitwidth_v2i32( +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> %x, <i32 -1, i32 -1> +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x) + %cmp = icmp ne <2 x i32> %pop, <i32 32, i32 32> + ret <2 x i1> %cmp +} + diff --git a/test/Transforms/InstCombine/consecutive-fences.ll b/test/Transforms/InstCombine/consecutive-fences.ll index 6f1c41277386..8ecb399f39cb 100644 --- a/test/Transforms/InstCombine/consecutive-fences.ll +++ b/test/Transforms/InstCombine/consecutive-fences.ll @@ -4,7 +4,7 @@ ; CHECK-LABEL: define void @tinkywinky ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: fence singlethread acquire +; CHECK-NEXT: fence syncscope("singlethread") acquire ; CHECK-NEXT: ret void ; CHECK-NEXT: } @@ -12,21 +12,21 @@ define void @tinkywinky() { fence seq_cst fence seq_cst fence seq_cst - fence singlethread acquire - fence singlethread acquire - fence singlethread acquire + fence syncscope("singlethread") acquire + fence syncscope("singlethread") acquire + fence syncscope("singlethread") acquire ret void } ; CHECK-LABEL: define void @dipsy ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: fence singlethread seq_cst +; CHECK-NEXT: fence syncscope("singlethread") seq_cst ; CHECK-NEXT: ret void ; CHECK-NEXT: } define void @dipsy() { fence seq_cst - fence singlethread seq_cst + fence syncscope("singlethread") seq_cst ret void } diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll index 127fde10e9f7..a12f4206b1c6 100644 --- a/test/Transforms/InstCombine/icmp.ll +++ b/test/Transforms/InstCombine/icmp.ll @@ -2979,9 +2979,7 @@ declare i32 @llvm.bswap.i32(i32) define i1 @bswap_ne(i32 %x, i32 %y) { ; CHECK-LABEL: @bswap_ne( -; CHECK-NEXT: [[SWAPX:%.*]] = call i32 @llvm.bswap.i32(i32 %x) -; CHECK-NEXT: [[SWAPY:%.*]] = call i32 @llvm.bswap.i32(i32 %y) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[SWAPX]], [[SWAPY]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 %x, %y ; CHECK-NEXT: ret i1 [[CMP]] ; %swapx = call i32 @llvm.bswap.i32(i32 %x) @@ -2994,9 +2992,7 @@ declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) define <8 x i1> @bswap_vec_eq(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: @bswap_vec_eq( -; CHECK-NEXT: [[SWAPX:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x) -; CHECK-NEXT: [[SWAPY:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %y) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[SWAPX]], [[SWAPY]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> %x, %y ; CHECK-NEXT: ret <8 x i1> [[CMP]] ; %swapx = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x) @@ -3009,9 +3005,7 @@ declare i64 @llvm.bitreverse.i64(i64) define i1 @bitreverse_eq(i64 %x, i64 %y) { ; CHECK-LABEL: @bitreverse_eq( -; CHECK-NEXT: [[REVX:%.*]] = call i64 @llvm.bitreverse.i64(i64 %x) -; CHECK-NEXT: [[REVY:%.*]] = call i64 @llvm.bitreverse.i64(i64 %y) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[REVX]], [[REVY]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 %x, %y ; CHECK-NEXT: ret i1 [[CMP]] ; %revx = call i64 @llvm.bitreverse.i64(i64 %x) @@ -3024,9 +3018,7 @@ declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: @bitreverse_vec_ne( -; CHECK-NEXT: [[REVX:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x) -; CHECK-NEXT: [[REVY:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %y) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i16> [[REVX]], [[REVY]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i16> %x, %y ; CHECK-NEXT: ret <8 x i1> [[CMP]] ; %revx = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x) diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index c294d79f15ef..8d2f06edcaf3 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -475,66 +475,6 @@ define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) { ret <2 x i1> %res } -define void @cmp.simplify(i32 %a, i32 %b, i1* %c) { - %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone - %lz.cmp = icmp eq i32 %lz, 32 - store volatile i1 %lz.cmp, i1* %c - %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 false) nounwind readnone - %tz.cmp = icmp ne i32 %tz, 32 - store volatile i1 %tz.cmp, i1* %c - %pop0 = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone - %pop0.cmp = icmp eq i32 %pop0, 0 - store volatile i1 %pop0.cmp, i1* %c - %pop1 = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone - %pop1.cmp = icmp eq i32 %pop1, 32 - store volatile i1 %pop1.cmp, i1* %c - ret void -; CHECK: @cmp.simplify -; CHECK-NEXT: %lz.cmp = icmp eq i32 %a, 0 -; CHECK-NEXT: store volatile i1 %lz.cmp, i1* %c -; CHECK-NEXT: %tz.cmp = icmp ne i32 %a, 0 -; CHECK-NEXT: store volatile i1 %tz.cmp, i1* %c -; CHECK-NEXT: %pop0.cmp = icmp eq i32 %b, 0 -; CHECK-NEXT: store volatile i1 %pop0.cmp, i1* %c -; CHECK-NEXT: %pop1.cmp = icmp eq i32 %b, -1 -; CHECK-NEXT: store volatile i1 %pop1.cmp, i1* %c -} - -define <2 x i1> @ctlz_cmp_vec(<2 x i32> %a) { -; CHECK-LABEL: @ctlz_cmp_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> %a, zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[CMP]] -; - %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind readnone - %cmp = icmp eq <2 x i32> %x, <i32 32, i32 32> - ret <2 x i1> %cmp -} - -define <2 x i1> @cttz_cmp_vec(<2 x i32> %a) { -; CHECK-LABEL: @cttz_cmp_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> %a, zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[CMP]] -; - %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) nounwind readnone - %cmp = icmp ne <2 x i32> %x, <i32 32, i32 32> - ret <2 x i1> %cmp -} - -define void @ctpop_cmp_vec(<2 x i32> %a, <2 x i1>* %b) { - %pop0 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) nounwind readnone - %pop0.cmp = icmp eq <2 x i32> %pop0, zeroinitializer - store volatile <2 x i1> %pop0.cmp, <2 x i1>* %b - %pop1 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) nounwind readnone - %pop1.cmp = icmp eq <2 x i32> %pop1, < i32 32, i32 32 > - store volatile <2 x i1> %pop1.cmp, <2 x i1>* %b - ret void -; CHECK-LABEL: @ctpop_cmp_vec( -; CHECK-NEXT: %pop0.cmp = icmp eq <2 x i32> %a, zeroinitializer -; CHECK-NEXT: store volatile <2 x i1> %pop0.cmp, <2 x i1>* %b -; CHECK-NEXT: %pop1.cmp = icmp eq <2 x i32> %a, <i32 -1, i32 -1> -; CHECK-NEXT: store volatile <2 x i1> %pop1.cmp, <2 x i1>* %b -} - define i32 @ctlz_undef(i32 %Value) { ; CHECK-LABEL: @ctlz_undef( ; CHECK-NEXT: ret i32 undef diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll index 2164f0df8d27..947971c6c83b 100644 --- a/test/Transforms/InstCombine/or-xor.ll +++ b/test/Transforms/InstCombine/or-xor.ll @@ -348,10 +348,8 @@ define i8 @test18(i8 %A, i8 %B) { ; ((x | y) ^ (~x | ~y)) -> ~(x ^ y) define i32 @test19(i32 %x, i32 %y) { ; CHECK-LABEL: @test19( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[OR2_DEMORGAN:%.*]] = and i32 [[X]], [[Y]] -; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[XOR]] ; %noty = xor i32 %y, -1 @@ -365,10 +363,8 @@ define i32 @test19(i32 %x, i32 %y) { ; ((x | y) ^ (~y | ~x)) -> ~(x ^ y) define i32 @test20(i32 %x, i32 %y) { ; CHECK-LABEL: @test20( -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[OR2_DEMORGAN:%.*]] = and i32 [[Y]], [[X]] -; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[XOR]] ; %noty = xor i32 %y, -1 @@ -382,10 +378,8 @@ define i32 @test20(i32 %x, i32 %y) { ; ((~x | ~y) ^ (x | y)) -> ~(x ^ y) define i32 @test21(i32 %x, i32 %y) { ; CHECK-LABEL: @test21( -; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[X]], [[Y]] -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[XOR]] ; %noty = xor i32 %y, -1 @@ -399,10 +393,8 @@ define i32 @test21(i32 %x, i32 %y) { ; ((~x | ~y) ^ (y | x)) -> ~(x ^ y) define i32 @test22(i32 %x, i32 %y) { ; CHECK-LABEL: @test22( -; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[Y]], [[X]] -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[XOR]] ; %noty = xor i32 %y, -1 diff --git a/test/Transforms/InstCombine/pr33689_same_bitwidth.ll b/test/Transforms/InstCombine/pr33689_same_bitwidth.ll new file mode 100644 index 000000000000..e5dd019b9b51 --- /dev/null +++ b/test/Transforms/InstCombine/pr33689_same_bitwidth.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine %s -o - | FileCheck %s + +; All the "useless" instructions should be removed and we shouldn't crash. + +target datalayout = "p:16:16" + +%i64_t = type i64 + +@a = external global i16 +@b = external global i16* + +define void @f() { +; CHECK-LABEL: @f( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[TMP12:%.*]] = alloca [2 x i32], align 8 +; CHECK-NEXT: [[TMP12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP12]], i16 0, i16 0 +; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint [2 x i32]* [[TMP12]] to i16 +; CHECK-NEXT: store i16 [[TMP8]], i16* @a, align 2 +; CHECK-NEXT: unreachable +; CHECK: bb2: +; CHECK-NEXT: [[TMP9:%.*]] = load i16*, i16** @b, align 2 +; CHECK-NEXT: store i16 0, i16* [[TMP9]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP12_SUB]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -1 +; CHECK-NEXT: store i32 [[TMP11]], i32* [[TMP12_SUB]], align 8 +; CHECK-NEXT: ret void +; +bb0: + %tmp1 = alloca %i64_t + %tmp2 = bitcast %i64_t* %tmp1 to i32* + %useless3 = bitcast %i64_t* %tmp1 to i16* + %useless4 = getelementptr inbounds i16, i16* %useless3, i16 undef + %useless5 = bitcast i16* %useless4 to i32* + br i1 undef, label %bb1, label %bb2 + +bb1: ; preds = %bb0 + %useless6 = insertvalue [1 x i32*] undef, i32* %tmp2, 0 + %useless7 = insertvalue [1 x i32*] %useless6, i32* null, 0 + %tmp8 = ptrtoint i32* %tmp2 to i16 + store i16 %tmp8, i16* @a + unreachable + +bb2: ; preds = %bb0 + %tmp9 = load i16*, i16** @b + store i16 0, i16* %tmp9 + %tmp10 = load i32, i32* %tmp2 + %tmp11 = sub i32 %tmp10, 1 + store i32 %tmp11, i32* %tmp2 + ret void +} diff --git a/test/Transforms/InstCombine/select-implied.ll b/test/Transforms/InstCombine/select-implied.ll index 2100e3eae008..2558745c18f3 100644 --- a/test/Transforms/InstCombine/select-implied.ll +++ b/test/Transforms/InstCombine/select-implied.ll @@ -121,3 +121,80 @@ end: declare void @foo(i32) declare i32 @bar(i32) + +; CHECK-LABEL: @test_and +; CHECK: tpath: +; CHECK-NOT: select +; CHECK: ret i32 313 +define i32 @test_and(i32 %a, i32 %b) { +entry: + %cmp1 = icmp ne i32 %a, 0 + %cmp2 = icmp ne i32 %b, 0 + %and = and i1 %cmp1, %cmp2 + br i1 %and, label %tpath, label %end + +tpath: + %cmp3 = icmp eq i32 %a, 0 ;; <-- implied false + %c = select i1 %cmp3, i32 0, i32 313 + ret i32 %c + +end: + ret i32 0 +} + +; cmp1 and cmp2 are false on the 'fpath' path and thus cmp3 is true. +; CHECK-LABEL: @test_or1 +; CHECK: fpath: +; CHECK-NOT: select +; CHECK: ret i32 37 +define i32 @test_or1(i32 %a, i32 %b) { +entry: + %cmp1 = icmp eq i32 %a, 0 + %cmp2 = icmp eq i32 %b, 0 + %or = or i1 %cmp1, %cmp2 + br i1 %or, label %end, label %fpath + +fpath: + %cmp3 = icmp ne i32 %a, 0 ;; <-- implied true + %c = select i1 %cmp3, i32 37, i32 0 + ret i32 %c + +end: + ret i32 0 +} + +; LHS ==> RHS by definition (true -> true) +; CHECK-LABEL: @test6 +; CHECK: taken: +; CHECK-NOT: select +; CHECK: call void @foo(i32 10) +define void @test6(i32 %a, i32 %b) { + %cmp1 = icmp eq i32 %a, %b + br i1 %cmp1, label %taken, label %end + +taken: + %c = select i1 %cmp1, i32 10, i32 0 + call void @foo(i32 %c) + br label %end + +end: + ret void +} + +; LHS ==> RHS by definition (false -> false) +; CHECK-LABEL: @test7 +; CHECK: taken: +; CHECK-NOT: select +; CHECK: call void @foo(i32 11) +define void @test7(i32 %a, i32 %b) { + %cmp1 = icmp eq i32 %a, %b + br i1 %cmp1, label %end, label %taken + +taken: + %c = select i1 %cmp1, i32 0, i32 11 + call void @foo(i32 %c) + br label %end + +end: + ret void +} diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll index c8f2a50b72ed..acfa053daaf8 100644 --- a/test/Transforms/InstCombine/select.ll +++ b/test/Transforms/InstCombine/select.ll @@ -1370,3 +1370,10 @@ define i8 @assume_cond_false(i1 %cond, i8 %x, i8 %y) { ret i8 %sel } +; Test case to make sure we don't consider an all ones float values for converting the select into a sext. +define <4 x float> @PR33721(<4 x float> %w) { +entry: + %0 = fcmp ole <4 x float> %w, zeroinitializer + %1 = select <4 x i1> %0, <4 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <4 x float> zeroinitializer + ret <4 x float> %1 +} diff --git a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll index 5938f9d7321d..715c9413a819 100644 --- a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll +++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll @@ -854,3 +854,32 @@ define void @load_factor2_fp128(<4 x fp128>* %ptr) { %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> <i32 1, i32 3> ret void } + +define void @load_factor2_wide_pointer(<16 x i32*>* %ptr) { +; NEON-LABEL: @load_factor2_wide_pointer( +; NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i32*>* %ptr to i32* +; NEON-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8* +; NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP2]], i32 4) +; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 +; NEON-NEXT: [[TMP4:%.*]] = inttoptr <4 x i32> [[TMP3]] to <4 x i32*> +; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 +; NEON-NEXT: [[TMP6:%.*]] = inttoptr <4 x i32> [[TMP5]] to <4 x i32*> +; NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8 +; NEON-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +; NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP8]], i32 4) +; NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1 +; NEON-NEXT: [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x i32*> +; NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0 +; NEON-NEXT: [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x i32*> +; NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32*> [[TMP4]], <4 x i32*> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; NEON-NEXT: ret void +; NO_NEON-LABEL: @load_factor2_wide_pointer( +; NO_NEON-NOT: @llvm.arm.neon +; NO_NEON: ret void +; + %interleaved.vec = load <16 x i32*>, <16 x i32*>* %ptr, align 4 + %v0 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> + %v1 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> + ret void +} diff --git a/test/Transforms/LoopRotate/pr33701.ll b/test/Transforms/LoopRotate/pr33701.ll new file mode 100644 index 000000000000..ed162b120982 --- /dev/null +++ b/test/Transforms/LoopRotate/pr33701.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output + +define void @func() { +bb0: + br label %bb1 + +bb1: ; preds = %bb4, %bb0 + %0 = phi i16 [ %2, %bb4 ], [ 0, %bb0 ] + %1 = icmp sle i16 %0, 2 + br i1 %1, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + br i1 undef, label %bb6, label %bb4 + +bb3: ; No predecessors! + br label %bb6 + +bb4: ; preds = %bb2 + %2 = add i16 undef, 1 + br label %bb1 + +bb5: ; preds = %bb1 + br label %bb6 + +bb6: ; preds = %bb5, %bb3, %bb2 + unreachable +} diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll index dcd068191e10..ea3f60772319 100644 --- a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll @@ -14,8 +14,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; current LSR cost model. ; CHECK-NOT: = ptrtoint i8* undef to i64 ; CHECK: .lr.ph -; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp5, 1 -; CHECK: sub i64 [[TMP]], %tmp6 +; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1 +; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}} ; CHECK: ret void define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 { bb: diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll new file mode 100644 index 000000000000..4ce6f1a79fbf --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -loop-reduce -lsr-filter-same-scaled-reg=true -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.ham = type { i8, i8, [5 x i32], i64, i64, i64 } + +@global = external local_unnamed_addr global %struct.ham, align 8 + +define void @foo() local_unnamed_addr { +bb: + %tmp = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 3), align 8 + %tmp1 = and i64 %tmp, 1792 + %tmp2 = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 4), align 8 + %tmp3 = add i64 %tmp1, %tmp2 + %tmp4 = load i8*, i8** null, align 8 + %tmp5 = getelementptr inbounds i8, i8* %tmp4, i64 0 + %tmp6 = sub i64 0, %tmp3 + %tmp7 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp6 + %tmp8 = inttoptr i64 0 to i8* + br label %bb9 + +; Without filtering non-optimal formulae with the same ScaledReg and Scale, the strategy +; to narrow LSR search space by picking winner reg will generate only one lsr.iv and +; unoptimal result. +; CHECK-LABEL: @foo( +; CHECK: bb9: +; CHECK-NEXT: = phi i8* +; CHECK-NEXT: = phi i8* + +bb9: ; preds = %bb12, %bb + %tmp10 = phi i8* [ %tmp7, %bb ], [ %tmp16, %bb12 ] + %tmp11 = phi i8* [ %tmp8, %bb ], [ %tmp17, %bb12 ] + br i1 false, label %bb18, label %bb12 + +bb12: ; preds = %bb9 + %tmp13 = getelementptr inbounds i8, i8* %tmp10, i64 8 + %tmp14 = bitcast i8* %tmp13 to i64* + %tmp15 = load i64, i64* %tmp14, align 1 + %tmp16 = getelementptr inbounds i8, i8* %tmp10, i64 16 + %tmp17 = getelementptr inbounds i8, i8* %tmp11, i64 16 + br label %bb9 + +bb18: ; preds = %bb9 + %tmp19 = icmp ugt i8* %tmp11, null + %tmp20 = getelementptr inbounds i8, i8* %tmp10, i64 8 + %tmp21 = getelementptr inbounds i8, i8* %tmp11, i64 8 + %tmp22 = select i1 %tmp19, i8* %tmp10, i8* %tmp20 + %tmp23 = select i1 %tmp19, i8* %tmp11, i8* %tmp21 + br label %bb24 + +bb24: ; preds = %bb24, %bb18 + %tmp25 = phi i8* [ %tmp27, %bb24 ], [ %tmp22, %bb18 ] + %tmp26 = phi i8* [ %tmp29, %bb24 ], [ %tmp23, %bb18 ] + %tmp27 = getelementptr inbounds i8, i8* %tmp25, i64 1 + %tmp28 = load i8, i8* %tmp25, align 1 + %tmp29 = getelementptr inbounds i8, i8* %tmp26, i64 1 + store i8 %tmp28, i8* %tmp26, align 1 + %tmp30 = icmp eq i8* %tmp29, %tmp5 + br label %bb24 +} diff --git a/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll index 1f31a133e34d..73672e14f78a 100644 --- a/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ b/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -1,29 +1,52 @@ -; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S| FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -S | FileCheck %s -check-prefix=EPILOG-NO-IC +; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s -check-prefix=EPILOG ; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine +; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-runtime-epilog=false -unroll-count=2 -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -; the second RUN generates an epilog remainder block for all the test +; the third and fifth RUNs generate an epilog/prolog remainder block for all the test ; cases below (it does not generate a loop). ; test with three exiting and three exit blocks. ; none of the exit blocks have successors define void @test1(i64 %trip, i1 %cond) { -; CHECK-LABEL: test1 -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; CHECK-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] -; CHECK: entry.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK-LABEL: loop_latch.epil: -; CHECK-NEXT: %epil.iter.sub = add i64 %epil.iter, -1 -; CHECK-NEXT: %epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0 -; CHECK-NEXT: br i1 %epil.iter.cmp, label %exit2.loopexit.epilog-lcssa, label %loop_header.epil -; CHECK-LABEL: loop_latch.7: -; CHECK-NEXT: %niter.nsub.7 = add i64 %niter, -8 -; CHECK-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0 -; CHECK-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header +; EPILOG: test1( +; EPILOG-NEXT: entry: +; EPILOG-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 +; EPILOG-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 +; EPILOG-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 +; EPILOG-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] +; EPILOG: entry.new: +; EPILOG-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]] +; EPILOG-NEXT: br label [[LOOP_HEADER:%.*]] +; EPILOG: loop_latch.epil: +; EPILOG-NEXT: %epil.iter.sub = add i64 %epil.iter, -1 +; EPILOG-NEXT: %epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0 +; EPILOG-NEXT: br i1 %epil.iter.cmp, label %exit2.loopexit.epilog-lcssa, label %loop_header.epil +; EPILOG: loop_latch.7: +; EPILOG-NEXT: %niter.nsub.7 = add i64 %niter, -8 +; EPILOG-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0 +; EPILOG-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header + +; PROLOG: test1( +; PROLOG-NEXT: entry: +; PROLOG-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 +; PROLOG-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 +; PROLOG-NEXT: [[TMP1:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; PROLOG-NEXT: br i1 [[TMP1]], label %loop_header.prol.loopexit, label %loop_header.prol.preheader +; PROLOG: loop_header.prol: +; PROLOG-NEXT: %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ] +; PROLOG-NEXT: %prol.iter = phi i64 [ [[XTRAITER]], %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ] +; PROLOG-NEXT: br i1 %cond, label %loop_latch.prol, label %loop_exiting_bb1.prol +; PROLOG: loop_latch.prol: +; PROLOG-NEXT: %iv_next.prol = add i64 %iv.prol, 1 +; PROLOG-NEXT: %prol.iter.sub = add i64 %prol.iter, -1 +; PROLOG-NEXT: %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0 +; PROLOG-NEXT: br i1 %prol.iter.cmp, label %loop_header.prol.loopexit.unr-lcssa, label %loop_header.prol +; PROLOG: loop_latch.7: +; PROLOG-NEXT: %iv_next.7 = add i64 %iv, 8 +; PROLOG-NEXT: %cmp.7 = icmp eq i64 %iv_next.7, %trip +; PROLOG-NEXT: br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header entry: br label %loop_header @@ -59,17 +82,30 @@ exit2.loopexit: ; %sum.02 and %add. Both of these are incoming values for phi from every exiting ; unrolled block. define i32 @test2(i32* nocapture %a, i64 %n) { -; CHECK-LABEL: test2 -; CHECK-LABEL: for.exit2.loopexit: -; CHECK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], -; CHECK-NEXT: br label %for.exit2 -; CHECK-LABEL: for.exit2.loopexit2: -; CHECK-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] -; CHECK-NEXT: br label %for.exit2 -; CHECK-LABEL: for.exit2: -; CHECK-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ] -; CHECK-NEXT: ret i32 %retval -; CHECK: %niter.nsub.7 = add i64 %niter, -8 +; EPILOG: test2( +; EPILOG: for.exit2.loopexit: +; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], +; EPILOG-NEXT: br label %for.exit2 +; EPILOG: for.exit2.loopexit2: +; EPILOG-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] +; EPILOG-NEXT: br label %for.exit2 +; EPILOG: for.exit2: +; EPILOG-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ] +; EPILOG-NEXT: ret i32 %retval +; EPILOG: %niter.nsub.7 = add i64 %niter, -8 + +; PROLOG: test2( +; PROLOG: for.exit2.loopexit: +; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], +; PROLOG-NEXT: br label %for.exit2 +; PROLOG: for.exit2.loopexit1: +; PROLOG-NEXT: %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ] +; PROLOG-NEXT: br label %for.exit2 +; PROLOG: for.exit2: +; PROLOG-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ] +; PROLOG-NEXT: ret i32 %retval +; PROLOG: %indvars.iv.next.7 = add i64 %indvars.iv, 8 + entry: br label %header @@ -102,25 +138,42 @@ for.exit2: ; test with two exiting and three exit blocks. ; the non-latch exiting block has a switch. define void @test3(i64 %trip, i64 %add) { -; CHECK-LABEL: test3 -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; CHECK-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] -; CHECK: entry.new: -; CHECK-NEXT: %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK-LABEL: loop_header: -; CHECK-NEXT: %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ] -; CHECK-NEXT: %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ] -; CHECK-LABEL: loop_exiting_bb1.7: -; CHECK-NEXT: switch i64 %sum.next.6, label %loop_latch.7 -; CHECK-LABEL: loop_latch.7: -; CHECK-NEXT: %sum.next.7 = add i64 %sum.next.6, %add -; CHECK-NEXT: %niter.nsub.7 = add i64 %niter, -8 -; CHECK-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0 -; CHECK-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header +; EPILOG: test3( +; EPILOG-NEXT: entry: +; EPILOG-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 +; EPILOG-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 +; EPILOG-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 +; EPILOG-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] +; EPILOG: entry.new: +; EPILOG-NEXT: %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]] +; EPILOG-NEXT: br label [[LOOP_HEADER:%.*]] +; EPILOG: loop_header: +; EPILOG-NEXT: %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ] +; EPILOG-NEXT: %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ] +; EPILOG: loop_exiting_bb1.7: +; EPILOG-NEXT: switch i64 %sum.next.6, label %loop_latch.7 +; EPILOG: loop_latch.7: +; EPILOG-NEXT: %sum.next.7 = add i64 %sum.next.6, %add +; EPILOG-NEXT: %niter.nsub.7 = add i64 %niter, -8 +; EPILOG-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0 +; EPILOG-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header + +; PROLOG: test3( +; PROLOG-NEXT: entry: +; PROLOG-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 +; PROLOG-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 +; PROLOG-NEXT: [[TMP1:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; PROLOG-NEXT: br i1 [[TMP1]], label %loop_header.prol.loopexit, label %loop_header.prol.preheader +; PROLOG: loop_header: +; PROLOG-NEXT: %iv = phi i64 [ %iv.unr, %entry.new ], [ %iv_next.7, %loop_latch.7 ] +; PROLOG-NEXT: %sum = phi i64 [ %sum.unr, %entry.new ], [ %sum.next.7, %loop_latch.7 ] +; PROLOG: loop_exiting_bb1.7: +; PROLOG-NEXT: switch i64 %sum.next.6, label %loop_latch.7 +; PROLOG: loop_latch.7: +; PROLOG-NEXT: %iv_next.7 = add nsw i64 %iv, 8 +; PROLOG-NEXT: %sum.next.7 = add i64 %sum.next.6, %add +; PROLOG-NEXT: %cmp.7 = icmp eq i64 %iv_next.7, %trip +; PROLOG-NEXT: br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header entry: br label %loop_header @@ -153,9 +206,13 @@ exit2.loopexit: ; FIXME: Support multiple exiting blocks to the same latch exit block. define i32 @test4(i32* nocapture %a, i64 %n, i1 %cond) { -; CHECK-LABEL: test4 -; CHECK-NOT: .unr -; CHECK-NOT: .epil +; EPILOG: test4( +; EPILOG-NOT: .unr +; EPILOG-NOT: .epil + +; PROLOG: test4( +; PROLOG-NOT: .unr +; PROLOG-NOT: .prol entry: br label %header @@ -184,21 +241,68 @@ for.exit2: ret i32 42 } +; FIXME: Support multiple exiting blocks to the unique exit block. +define void @unique_exit(i32 %arg) { +; EPILOG: unique_exit( +; EPILOG-NOT: .unr +; EPILOG-NOT: .epil + +; PROLOG: unique_exit( +; PROLOG-NOT: .unr +; PROLOG-NOT: .prol +entry: + %tmp = icmp sgt i32 undef, %arg + br i1 %tmp, label %preheader, label %returnblock + +preheader: ; preds = %entry + br label %header + +LoopExit: ; preds = %header, %latch + %tmp2.ph = phi i32 [ %tmp4, %header ], [ -1, %latch ] + br label %returnblock + +returnblock: ; preds = %LoopExit, %entry + %tmp2 = phi i32 [ -1, %entry ], [ %tmp2.ph, %LoopExit ] + ret void + +header: ; preds = %preheader, %latch + %tmp4 = phi i32 [ %inc, %latch ], [ %arg, %preheader ] + %inc = add nsw i32 %tmp4, 1 + br i1 true, label %LoopExit, label %latch + +latch: ; preds = %header + %cmp = icmp slt i32 %inc, undef + br i1 %cmp, label %header, label %LoopExit +} + ; two exiting and two exit blocks. ; the non-latch exiting block has duplicate edges to the non-latch exit block. define i64 @test5(i64 %trip, i64 %add, i1 %cond) { -; CHECK-LABEL: test5 -; CHECK-LABEL: exit1.loopexit: -; CHECK-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], -; CHECK-NEXT: br label %exit1 -; CHECK-LABEL: exit1.loopexit2: -; CHECK-NEXT: %ivy.epil = add i64 %iv.epil, %add -; CHECK-NEXT: br label %exit1 -; CHECK-LABEL: exit1: -; CHECK-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ] -; CHECK-NEXT: ret i64 %result -; CHECK-LABEL: loop_latch.7: -; CHECK: %niter.nsub.7 = add i64 %niter, -8 +; EPILOG: test5( +; EPILOG: exit1.loopexit: +; EPILOG-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], +; EPILOG-NEXT: br label %exit1 +; EPILOG: exit1.loopexit2: +; EPILOG-NEXT: %ivy.epil = add i64 %iv.epil, %add +; EPILOG-NEXT: br label %exit1 +; EPILOG: exit1: +; EPILOG-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ] +; EPILOG-NEXT: ret i64 %result +; EPILOG: loop_latch.7: +; EPILOG: %niter.nsub.7 = add i64 %niter, -8 + +; PROLOG: test5( +; PROLOG: exit1.loopexit: +; PROLOG-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], +; PROLOG-NEXT: br label %exit1 +; PROLOG: exit1.loopexit1: +; PROLOG-NEXT: %ivy.prol = add i64 %iv.prol, %add +; PROLOG-NEXT: br label %exit1 +; PROLOG: exit1: +; PROLOG-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.prol, %exit1.loopexit1 ] +; PROLOG-NEXT: ret i64 %result +; PROLOG: loop_latch.7: +; PROLOG: %iv_next.7 = add nsw i64 %iv, 8 entry: br label %loop_header @@ -230,18 +334,31 @@ latchexit: ; test when exit blocks have successors. define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { -; CHECK-LABEL: test6 -; CHECK-LABEL: for.exit2.loopexit: -; CHECK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], -; CHECK-NEXT: br label %for.exit2 -; CHECK-LABEL: for.exit2.loopexit2: -; CHECK-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] -; CHECK-NEXT: br label %for.exit2 -; CHECK-LABEL: for.exit2: -; CHECK-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ] -; CHECK-NEXT: br i1 %cond, label %exit_true, label %exit_false -; CHECK-LABEL: latch.7: -; CHECK: %niter.nsub.7 = add i64 %niter, -8 +; EPILOG: test6( +; EPILOG: for.exit2.loopexit: +; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], +; EPILOG-NEXT: br label %for.exit2 +; EPILOG: for.exit2.loopexit2: +; EPILOG-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] +; EPILOG-NEXT: br label %for.exit2 +; EPILOG: for.exit2: +; EPILOG-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ] +; EPILOG-NEXT: br i1 %cond, label %exit_true, label %exit_false +; EPILOG: latch.7: +; EPILOG: %niter.nsub.7 = add i64 %niter, -8 + +; PROLOG: test6( +; PROLOG: for.exit2.loopexit: +; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], +; PROLOG-NEXT: br label %for.exit2 +; PROLOG: for.exit2.loopexit1: +; PROLOG-NEXT: %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ] +; PROLOG-NEXT: br label %for.exit2 +; PROLOG: for.exit2: +; PROLOG-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ] +; PROLOG-NEXT: br i1 %cond, label %exit_true, label %exit_false +; PROLOG: latch.7: +; PROLOG: %indvars.iv.next.7 = add i64 %indvars.iv, 8 entry: br label %header @@ -277,3 +394,87 @@ exit_true: exit_false: ret i32 %addx } + +; test when value in exit block does not have VMap. +define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { +; EPILOG-NO-IC: test7( +; EPILOG-NO-IC: loopexit1.loopexit: +; EPILOG-NO-IC-NEXT: %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ] +; EPILOG-NO-IC-NEXT: br label %loopexit1 +; EPILOG-NO-IC: loopexit1.loopexit1: +; EPILOG-NO-IC-NEXT: %sext3.ph2 = phi i32 [ %shft, %header.epil ] +; EPILOG-NO-IC-NEXT: br label %loopexit1 +; EPILOG-NO-IC: loopexit1: +; EPILOG-NO-IC-NEXT: %sext3 = phi i32 [ %sext3.ph, %loopexit1.loopexit ], [ %sext3.ph2, %loopexit1.loopexit1 ] +bb: + %tmp = icmp slt i32 undef, 2 + %sext = sext i32 undef to i64 + %shft = ashr exact i32 %arg, 16 + br i1 %tmp, label %loopexit2, label %preheader + +preheader: ; preds = %bb2 + br label %header + +header: ; preds = %latch, %preheader + %tmp6 = phi i64 [ 1, %preheader ], [ %add, %latch ] + br i1 false, label %loopexit1, label %latch + +latch: ; preds = %header + %add = add nuw nsw i64 %tmp6, 1 + %tmp9 = icmp slt i64 %add, %sext + br i1 %tmp9, label %header, label %latchexit + +latchexit: ; preds = %latch + unreachable + +loopexit2: ; preds = %bb2 + ret i32 %shft + +loopexit1: ; preds = %header + %sext3 = phi i32 [ %shft, %header ] + ret i32 %sext3 +} + +; Nested loop and inner loop is unrolled +; FIXME: we cannot unroll with epilog remainder currently, because +; the outer loop does not contain the epilog preheader and epilog exit (while +; infact it should). This causes us to choke up on LCSSA form being incorrect in +; outer loop. However, the exit block where LCSSA fails, is infact still within +; the outer loop. For now, we just bail out in presence of outer loop and epilog +; loop is generated. +; The outer loop header is the preheader for the inner loop and the inner header +; branches back to the outer loop. +define void @test8() { +; EPILOG: test8( +; EPILOG-NOT: niter + +; PROLOG: test8( +; PROLOG: outerloop: +; PROLOG-NEXT: phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit ] +; PROLOG: %lcmp.mod = icmp eq i64 +; PROLOG-NEXT: br i1 %lcmp.mod, label %innerH.prol.loopexit, label %innerH.prol.preheader +; PROLOG: latch.6: +; PROLOG-NEXT: %tmp4.7 = add nsw i64 %tmp3, 8 +; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 +; PROLOG: latch.7 +; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp4.7, 100 +; PROLOG-NEXT: br i1 %tmp6.7, label %innerH, label %exit.unr-lcssa +bb: + br label %outerloop + +outerloop: ; preds = %innerH, %bb + %tmp = phi i64 [ 3, %bb ], [ 0, %innerH ] + br label %innerH + +innerH: ; preds = %latch, %outerloop + %tmp3 = phi i64 [ %tmp4, %latch ], [ %tmp, %outerloop ] + %tmp4 = add nuw nsw i64 %tmp3, 1 + br i1 false, label %outerloop, label %latch + +latch: ; preds = %innerH + %tmp6 = icmp ult i64 %tmp4, 100 + br i1 %tmp6, label %innerH, label %exit + +exit: ; preds = %latch + ret void +} diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll index 04661314eb1d..878f4e8c78f0 100644 --- a/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/test/Transforms/LoopUnroll/runtime-loop.ll @@ -170,6 +170,74 @@ for.end: ; preds = %for.cond.for.end_cr ret i16 %res.0.lcssa } +; dont unroll loop with multiple exit/exiting blocks, unless +; -runtime-unroll-multi-exit=true +; single exit, multiple exiting blocks. +define void @unique_exit(i32 %arg) { +; PROLOG: unique_exit( +; PROLOG-NOT: .unr + +; EPILOG: unique_exit( +; EPILOG-NOT: .unr +entry: + %tmp = icmp sgt i32 undef, %arg + br i1 %tmp, label %preheader, label %returnblock + +preheader: ; preds = %entry + br label %header + +LoopExit: ; preds = %header, %latch + %tmp2.ph = phi i32 [ %tmp4, %header ], [ -1, %latch ] + br label %returnblock + +returnblock: ; preds = %LoopExit, %entry + %tmp2 = phi i32 [ -1, %entry ], [ %tmp2.ph, %LoopExit ] + ret void + +header: ; preds = %preheader, %latch + %tmp4 = phi i32 [ %inc, %latch ], [ %arg, %preheader ] + %inc = add nsw i32 %tmp4, 1 + br i1 true, label %LoopExit, label %latch + +latch: ; preds = %header + %cmp = icmp slt i32 %inc, undef + br i1 %cmp, label %header, label %LoopExit +} + +; multiple exit blocks. don't unroll +define void @multi_exit(i64 %trip, i1 %cond) { +; PROLOG: multi_exit( +; PROLOG-NOT: .unr + +; EPILOG: multi_exit( +; EPILOG-NOT: .unr +entry: + br label %loop_header + +loop_header: + %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] + br i1 %cond, label %loop_latch, label %loop_exiting_bb1 + +loop_exiting_bb1: + br i1 false, label %loop_exiting_bb2, label %exit1 + +loop_exiting_bb2: + br i1 false, label %loop_latch, label %exit3 + +exit3: + ret void + +loop_latch: + %iv_next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv_next, %trip + br i1 %cmp, label %loop_header, label %exit2.loopexit + +exit1: + ret void + +exit2.loopexit: + ret void +} !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.unroll.runtime.disable"} diff --git a/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll b/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll new file mode 100644 index 000000000000..cd3e89ae7350 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -mcpu=slm -debug 2>&1 | FileCheck -check-prefix=MSG %s +; REQUIRES: asserts +; This test should not be vectorized in X86\SLM arch +; Vectorizing the 64bit multiply in this case is wrong since +; it can be done with a lower bit mode (notice that the sources is 16bit) +; Also addq\subq (quad word) has a high cost on SLM arch. +; this test has a bad performance (regression of -70%) if vectorized on SLM arch +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @no_vec(i32 %LastIndex, i16* nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) { +entry: +; MSG: LV: Selecting VF: 1. + %cmp17 = icmp sgt i32 %LastIndex, 0 + br i1 %cmp17, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %conv5 = sext i16 %Scale to i64 + %sh_prom = and i64 %conv5, 4294967295 + %0 = sext i16 %lag to i64 + %wide.trip.count = zext i32 %LastIndex to i64 + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + %conv8 = trunc i64 %add7 to i32 + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %Accumulator.0.lcssa = phi i32 [ 0, %entry ], [ %conv8, %for.cond.cleanup.loopexit ] + ret i32 %Accumulator.0.lcssa + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %Accumulator.018 = phi i64 [ 0, %for.body.lr.ph ], [ %add7, %for.body ] + %arrayidx = getelementptr inbounds i16, i16* %InputData, i64 %indvars.iv + %1 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %1 to i64 + %2 = add nsw i64 %indvars.iv, %0 + %arrayidx3 = getelementptr inbounds i16, i16* %InputData, i64 %2 + %3 = load i16, i16* %arrayidx3, align 2 + %conv4 = sext i16 %3 to i64 + %mul = mul nsw i64 %conv4, %conv + %shr = ashr i64 %mul, %sh_prom + %add7 = add i64 %shr, %Accumulator.018 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll index 3a581ebf847e..7f381ae6ad7b 100644 --- a/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -1,18 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -;CHECK-LABEL: @foo( -;CHECK: icmp sgt -;CHECK: icmp sgt -;CHECK: icmp slt -;CHECK: select <4 x i1> -;CHECK: %[[P1:.*]] = select <4 x i1> -;CHECK: xor <4 x i1> -;CHECK: and <4 x i1> -;CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %[[P1]] -;CHECK: ret define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP26]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[MIN_ITERS_CHECKED:%.*]] +; CHECK: min.iters.checked: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[N]], 3 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[CMP_ZERO:%.*]] = icmp eq i64 [[N_VEC]], 0 +; CHECK-NEXT: br i1 [[CMP_ZERO]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]] +; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !3 +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD6]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 19, i32 19, i32 19, i32 19> +; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], <i32 4, i32 4, i32 4, i32 4> +; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 5, i32 5, i32 5, i32 5> +; CHECK-NEXT: [[TMP15:%.*]] = and <4 x i1> [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> <i32 9, i32 9, i32 9, i32 9> +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], <i1 true, i1 true, i1 true, i1 true> +; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i1> [[TMP11]], [[TMP16]] +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP17]], <4 x i32> [[TMP14]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[PREDPHI7]], <4 x i32>* [[TMP18]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[MIN_ITERS_CHECKED]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END14]] +; CHECK: if.then: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP20]], 19 +; CHECK-NEXT: br i1 [[CMP6]], label [[IF_END14]], label [[IF_ELSE:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP21]], 4 +; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP10]], i32 4, i32 5 +; CHECK-NEXT: br label [[IF_END14]] +; CHECK: if.end14: +; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 9, [[FOR_BODY]] ], [ 3, [[IF_THEN]] ], [ [[DOT]], [[IF_ELSE]] ] +; CHECK-NEXT: store i32 [[X_0]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !8 +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret i32 undef +; entry: %cmp26 = icmp sgt i32 %n, 0 br i1 %cmp26, label %for.body, label %for.end @@ -46,3 +120,4 @@ if.end14: for.end: ret i32 undef } + diff --git a/test/Transforms/LoopVectorize/pr33706.ll b/test/Transforms/LoopVectorize/pr33706.ll new file mode 100644 index 000000000000..b9d0d8a44acc --- /dev/null +++ b/test/Transforms/LoopVectorize/pr33706.ll @@ -0,0 +1,61 @@ +; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s | FileCheck %s + +@global = local_unnamed_addr global i32 0, align 4 +@global.1 = local_unnamed_addr global i32 0, align 4 +@global.2 = local_unnamed_addr global float 0x3EF0000000000000, align 4 + +; CHECK-LABEL: @PR33706 +; CHECK-NOT: <2 x i32> +define void @PR33706(float* nocapture readonly %arg, float* nocapture %arg1, i32 %arg2) local_unnamed_addr { +bb: + %tmp = load i32, i32* @global.1, align 4 + %tmp3 = getelementptr inbounds float, float* %arg, i64 190 + %tmp4 = getelementptr inbounds float, float* %arg1, i64 512 + %tmp5 = and i32 %tmp, 65535 + %tmp6 = icmp ugt i32 %arg2, 65536 + br i1 %tmp6, label %bb7, label %bb9 + +bb7: ; preds = %bb + %tmp8 = load i32, i32* @global, align 4 + br label %bb27 + +bb9: ; preds = %bb + %tmp10 = udiv i32 65536, %arg2 + br label %bb11 + +bb11: ; preds = %bb11, %bb9 + %tmp12 = phi i32 [ %tmp20, %bb11 ], [ %tmp5, %bb9 ] + %tmp13 = phi float* [ %tmp18, %bb11 ], [ %tmp4, %bb9 ] + %tmp14 = phi i32 [ %tmp16, %bb11 ], [ %tmp10, %bb9 ] + %tmp15 = phi i32 [ %tmp19, %bb11 ], [ %tmp, %bb9 ] + %tmp16 = add nsw i32 %tmp14, -1 + %tmp17 = sitofp i32 %tmp12 to float + store float %tmp17, float* %tmp13, align 4 + %tmp18 = getelementptr inbounds float, float* %tmp13, i64 1 + %tmp19 = add i32 %tmp15, %arg2 + %tmp20 = and i32 %tmp19, 65535 + %tmp21 = icmp eq i32 %tmp16, 0 + br i1 %tmp21, label %bb22, label %bb11 + +bb22: ; preds = %bb11 + %tmp23 = phi float* [ %tmp18, %bb11 ] + %tmp24 = phi i32 [ %tmp19, %bb11 ] + %tmp25 = phi i32 [ %tmp20, %bb11 ] + %tmp26 = ashr i32 %tmp24, 16 + store i32 %tmp26, i32* @global, align 4 + br label %bb27 + +bb27: ; preds = %bb22, %bb7 + %tmp28 = phi i32 [ %tmp26, %bb22 ], [ %tmp8, %bb7 ] + %tmp29 = phi float* [ %tmp23, %bb22 ], [ %tmp4, %bb7 ] + %tmp30 = phi i32 [ %tmp25, %bb22 ], [ %tmp5, %bb7 ] + %tmp31 = sext i32 %tmp28 to i64 + %tmp32 = getelementptr inbounds float, float* %tmp3, i64 %tmp31 + %tmp33 = load float, float* %tmp32, align 4 + %tmp34 = sitofp i32 %tmp30 to float + %tmp35 = load float, float* @global.2, align 4 + %tmp36 = fmul float %tmp35, %tmp34 + %tmp37 = fadd float %tmp33, %tmp36 + store float %tmp37, float* %tmp29, align 4 + ret void +} diff --git a/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml b/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml index 17b634acd0e1..558aa9aa73f2 100644 --- a/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml +++ b/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml @@ -16,4 +16,5 @@ CfiFunctionDefs: CfiFunctionDecls: - external - external_weak + - local_decl ... diff --git a/test/Transforms/LowerTypeTests/import-icall.ll b/test/Transforms/LowerTypeTests/import-icall.ll index ddeb7fb5c9a2..b4e374720321 100644 --- a/test/Transforms/LowerTypeTests/import-icall.ll +++ b/test/Transforms/LowerTypeTests/import-icall.ll @@ -19,6 +19,10 @@ define i8 @use_b() { ret i8 %x } +define void @local_decl() { + call void @local_decl() + ret void +} declare void @external() declare extern_weak void @external_weak() @@ -33,6 +37,9 @@ declare extern_weak void @external_weak() ; CHECK: define internal i8 @local_b() { ; CHECK-NEXT: call i8 @local_a() +; CHECK: define void @local_decl() +; CHECK-NEXT: call void @local_decl() + ; CHECK: declare void @external() ; CHECK: declare extern_weak void @external_weak() ; CHECK: declare i8 @local_a() diff --git a/test/Transforms/NewGVN/pr33720.ll b/test/Transforms/NewGVN/pr33720.ll new file mode 100644 index 000000000000..3b6c190a4494 --- /dev/null +++ b/test/Transforms/NewGVN/pr33720.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -newgvn -S %s | FileCheck %s + +@f = external local_unnamed_addr global i64 +@b = external local_unnamed_addr global i64 +@e = external local_unnamed_addr global i64 + +define void @patatino() { +; CHECK-LABEL: @patatino( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 undef, label [[IF_END24:%.*]], label [[FOR_COND16:%.*]] +; CHECK: for.cond2thread-pre-split: +; CHECK-NEXT: br i1 false, label [[FOR_BODY:%.*]], label [[FOR_COND8_PREHEADER:%.*]] +; CHECK: for.cond8.preheader: +; CHECK-NEXT: br i1 undef, label [[L1:%.*]], label %for.cond11thread-pre-split.lr.ph +; CHECK: for.cond11thread-pre-split.lr.ph: +; CHECK-NEXT: br label [[L1]] +; CHECK: for.body: +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i64 [[K_2:%.*]], 3 +; CHECK-NEXT: [[CONV4:%.*]] = zext i1 [[CMP3]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @f +; CHECK-NEXT: [[OR:%.*]] = or i64 [[TMP0]], [[CONV4]] +; CHECK-NEXT: store i64 [[OR]], i64* @f +; CHECK-NEXT: [[TOBOOL7:%.*]] = icmp ne i64 [[K_2]], 0 +; CHECK-NEXT: br i1 [[TOBOOL7]], label %for.cond2thread-pre-split, label [[LOR_RHS:%.*]] +; CHECK: lor.rhs: +; CHECK-NEXT: store i64 1, i64* @b, align 8 +; CHECK-NEXT: br label %for.cond2thread-pre-split +; CHECK: l1: +; CHECK-NEXT: [[K_2]] = phi i64 [ undef, [[L1_PREHEADER:%.*]] ], [ 15, [[FOR_COND8_PREHEADER]] ], [ 5, %for.cond11thread-pre-split.lr.ph ] +; CHECK-NEXT: store i64 7, i64* [[J_3:%.*]] +; CHECK-NEXT: br label [[FOR_BODY]] +; CHECK: for.cond16: +; CHECK-NEXT: [[J_0:%.*]] = phi i64* [ @f, [[ENTRY:%.*]] ], [ undef, [[FOR_COND20:%.*]] ], [ @e, [[FOR_COND16]] ] +; CHECK-NEXT: br i1 undef, label [[FOR_COND20]], label [[FOR_COND16]] +; CHECK: for.cond20: +; CHECK-NEXT: [[J_2:%.*]] = phi i64* [ [[J_0]], [[FOR_COND16]] ], [ undef, [[IF_END24]] ] +; CHECK-NEXT: br i1 true, label [[IF_END24]], label [[FOR_COND16]] +; CHECK: if.end24: +; CHECK-NEXT: [[J_3]] = phi i64* [ [[J_2]], [[FOR_COND20]] ], [ undef, [[ENTRY]] ] +; CHECK-NEXT: br i1 false, label [[FOR_COND20]], label [[L1_PREHEADER]] +; CHECK: l1.preheader: +; CHECK-NEXT: br label [[L1]] +; +entry: + br i1 undef, label %if.end24, label %for.cond16 + +for.cond2thread-pre-split: + br i1 false, label %for.body, label %for.cond8.preheader + +for.cond8.preheader: + br i1 undef, label %l1, label %for.cond11thread-pre-split.lr.ph + +for.cond11thread-pre-split.lr.ph: + br label %l1 + +for.body: + %k.031 = phi i64 [ %k.2, %l1 ], [ 15, %for.cond2thread-pre-split ] + %cmp3 = icmp ne i64 %k.031, 3 + %conv4 = zext i1 %cmp3 to i64 + %0 = load i64, i64* @f + %or = or i64 %0, %conv4 + store i64 %or, i64* @f + %tobool7 = icmp ne i64 %k.031, 0 + %or.cond = or i1 %tobool7, false + br i1 %or.cond, label %for.cond2thread-pre-split, label %lor.rhs + +lor.rhs: + store i64 1, i64* @b, align 8 + br label %for.cond2thread-pre-split + +l1: + %k.2 = phi i64 [ undef, %l1.preheader ], [ 15, %for.cond8.preheader ], [ 5, %for.cond11thread-pre-split.lr.ph ] + store i64 7, i64* %j.3 + br label %for.body + +for.cond16: + %j.0 = phi i64* [ @f, %entry ], [ %j.2, %for.cond20 ], [ @e, %for.cond16 ] + br i1 undef, label %for.cond20, label %for.cond16 + +for.cond20: + %j.2 = phi i64* [ %j.0, %for.cond16 ], [ %j.3, %if.end24 ] + br i1 true, label %if.end24, label %for.cond16 + +if.end24: + %j.3 = phi i64* [ %j.2, %for.cond20 ], [ undef, %entry ] + br i1 false, label %for.cond20, label %l1.preheader + +l1.preheader: + br label %l1 +} diff --git a/test/Transforms/PGOProfile/counter_promo_exit_merge.ll b/test/Transforms/PGOProfile/counter_promo_exit_merge.ll index f53d37600ce6..85ca1613c8ad 100644 --- a/test/Transforms/PGOProfile/counter_promo_exit_merge.ll +++ b/test/Transforms/PGOProfile/counter_promo_exit_merge.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s -; RUN: opt < %s --passes=instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s +; RUN: opt < %s -instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s +; RUN: opt < %s --passes=instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s $__llvm_profile_raw_version = comdat any diff --git a/test/Transforms/PGOProfile/counter_promo_mexits.ll b/test/Transforms/PGOProfile/counter_promo_mexits.ll index 71e5f066d50f..bb799757a47c 100644 --- a/test/Transforms/PGOProfile/counter_promo_mexits.ll +++ b/test/Transforms/PGOProfile/counter_promo_mexits.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s -; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s +; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s +; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s @g = common local_unnamed_addr global i32 0, align 4 diff --git a/test/Transforms/PGOProfile/counter_promo_nest.ll b/test/Transforms/PGOProfile/counter_promo_nest.ll new file mode 100644 index 000000000000..b7f117b3e949 --- /dev/null +++ b/test/Transforms/PGOProfile/counter_promo_nest.ll @@ -0,0 +1,165 @@ +; TEST that counter updates are promoted outside the whole loop nest +; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -S | FileCheck --check-prefix=PROMO %s +; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -S | FileCheck --check-prefix=PROMO %s + +@g = common local_unnamed_addr global i32 0, align 4 +@c = local_unnamed_addr global i32 10, align 4 + +; Function Attrs: noinline norecurse nounwind uwtable +define void @bar() local_unnamed_addr #0 { +bb: + %tmp2 = load i32, i32* @g, align 4, !tbaa !2 + %tmp3 = add nsw i32 %tmp2, 1 + store i32 %tmp3, i32* @g, align 4, !tbaa !2 + ret void +} + +; Function Attrs: norecurse nounwind uwtable +define i32 @main() local_unnamed_addr #1 { +bb: + store i32 0, i32* @g, align 4, !tbaa !2 + %tmp = load i32, i32* @c, align 4, !tbaa !2 + %tmp1 = icmp sgt i32 %tmp, 0 + br i1 %tmp1, label %bb2_1, label %bb84 + +bb2_1: + br label %bb2 + +bb2: ; preds = %bb39, %bb + %tmp3 = phi i32 [ %tmp40, %bb39 ], [ %tmp, %bb2_1 ] + %tmp5 = phi i32 [ %tmp43, %bb39 ], [ 0, %bb2_1 ] + %tmp7 = icmp sgt i32 %tmp3, 0 + br i1 %tmp7, label %bb14_1, label %bb39 + +bb8: ; preds = %bb39 +; PROMO-LABEL: bb8 +; PROMO: load {{.*}} @__profc_main{{.*}} +; PROMO-NEXT: add +; PROMO-NEXT: store {{.*}}@__profc_main{{.*}} +; PROMO-NEXT: load {{.*}} @__profc_main{{.*}} +; PROMO-NEXT: add +; PROMO-NEXT: store {{.*}}@__profc_main{{.*}} +; PROMO-NEXT: load {{.*}} @__profc_main{{.*}} +; PROMO-NEXT: add +; PROMO-NEXT: store {{.*}}@__profc_main{{.*}} +; PROMO-NEXT: load {{.*}} @__profc_main{{.*}} +; PROMO-NEXT: add +; PROMO-NEXT: store {{.*}}@__profc_main{{.*}} +; PROMO-NEXT: load {{.*}} @__profc_main{{.*}} +; PROMO-NEXT: add +; PROMO-NEXT: store {{.*}}@__profc_main{{.*}} + + %tmp13 = icmp sgt i32 %tmp40, 0 + br i1 %tmp13, label %bb45, label %bb84 + +bb14_1: + br label %bb14 + +bb14: ; preds = %bb29, %bb2 + %tmp15 = phi i32 [ %tmp30, %bb29 ], [ %tmp3, %bb14_1 ] + %tmp16 = phi i64 [ %tmp31, %bb29 ], [ 0, %bb14_1 ] + %tmp17 = phi i64 [ %tmp32, %bb29 ], [ 0, %bb14_1 ] + %tmp18 = phi i32 [ %tmp33, %bb29 ], [ 0, %bb14_1 ] + %tmp19 = icmp sgt i32 %tmp15, 0 + br i1 %tmp19, label %bb20_split, label %bb29 + +bb20_split: + br label %bb20 + +bb20: ; preds = %bb20, %bb14 + %tmp21 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb20_split ] + %tmp22 = phi i32 [ %tmp24, %bb20 ], [ 0, %bb20_split ] + %tmp23 = add nuw i64 %tmp21, 1 + tail call void @bar() + %tmp24 = add nuw nsw i32 %tmp22, 1 + %tmp25 = load i32, i32* @c, align 4, !tbaa !2 + %tmp26 = icmp slt i32 %tmp24, %tmp25 + br i1 %tmp26, label %bb20, label %bb27 + +bb27: ; preds = %bb20 + %tmp28 = add i64 %tmp23, %tmp16 + br label %bb29 + +bb29: ; preds = %bb27, %bb14 + %tmp30 = phi i32 [ %tmp25, %bb27 ], [ %tmp15, %bb14 ] + %tmp31 = phi i64 [ %tmp28, %bb27 ], [ %tmp16, %bb14 ] + %tmp32 = add nuw i64 %tmp17, 1 + %tmp33 = add nuw nsw i32 %tmp18, 1 + %tmp34 = icmp slt i32 %tmp33, %tmp30 + br i1 %tmp34, label %bb14, label %bb35 + +bb35: ; preds = %bb29 + %tmp36 = insertelement <2 x i64> undef, i64 %tmp31, i32 0 + br label %bb39 + +bb39: ; preds = %bb35, %bb2 + %tmp40 = phi i32 [ %tmp30, %bb35 ], [ %tmp3, %bb2 ] + %tmp43 = add nuw nsw i32 %tmp5, 1 + %tmp44 = icmp slt i32 %tmp43, %tmp40 + br i1 %tmp44, label %bb2, label %bb8 + +bb45: ; preds = %bb67, %bb8 + %tmp46 = phi i32 [ %tmp68, %bb67 ], [ %tmp40, %bb8 ] + %tmp47 = phi i64 [ %tmp69, %bb67 ], [ 0, %bb8 ] + %tmp48 = phi i64 [ %tmp70, %bb67 ], [ 0, %bb8 ] + %tmp49 = phi i32 [ %tmp71, %bb67 ], [ 0, %bb8 ] + %tmp50 = icmp sgt i32 %tmp46, 0 + br i1 %tmp50, label %bb57, label %bb67 + +bb51: ; preds = %bb67 + %tmp56 = icmp sgt i32 %tmp68, 0 + br i1 %tmp56, label %bb73, label %bb84 + +bb57: ; preds = %bb57, %bb45 + %tmp58 = phi i64 [ %tmp60, %bb57 ], [ 0, %bb45 ] + %tmp59 = phi i32 [ %tmp61, %bb57 ], [ 0, %bb45 ] + %tmp60 = add nuw i64 %tmp58, 1 + tail call void @bar() + %tmp61 = add nuw nsw i32 %tmp59, 1 + %tmp62 = load i32, i32* @c, align 4, !tbaa !2 + %tmp63 = mul nsw i32 %tmp62, 10 + %tmp64 = icmp slt i32 %tmp61, %tmp63 + br i1 %tmp64, label %bb57, label %bb65 + +bb65: ; preds = %bb57 + %tmp66 = add i64 %tmp60, %tmp47 + br label %bb67 + +bb67: ; preds = %bb65, %bb45 + %tmp68 = phi i32 [ %tmp62, %bb65 ], [ %tmp46, %bb45 ] + %tmp69 = phi i64 [ %tmp66, %bb65 ], [ %tmp47, %bb45 ] + %tmp70 = add nuw i64 %tmp48, 1 + %tmp71 = add nuw nsw i32 %tmp49, 1 + %tmp72 = icmp slt i32 %tmp71, %tmp68 + br i1 %tmp72, label %bb45, label %bb51 + +bb73: ; preds = %bb73, %bb51 + %tmp74 = phi i64 [ %tmp76, %bb73 ], [ 0, %bb51 ] + %tmp75 = phi i32 [ %tmp77, %bb73 ], [ 0, %bb51 ] + %tmp76 = add nuw i64 %tmp74, 1 + tail call void @bar() + %tmp77 = add nuw nsw i32 %tmp75, 1 + %tmp78 = load i32, i32* @c, align 4, !tbaa !2 + %tmp79 = mul nsw i32 %tmp78, 100 + %tmp80 = icmp slt i32 %tmp77, %tmp79 + br i1 %tmp80, label %bb73, label %bb81 + +bb81: ; preds = %bb73 + br label %bb84 + +bb84: ; preds = %bb81, %bb51, %bb8, %bb + ret i32 0 +} + +attributes #0 = { noinline } +attributes #1 = { norecurse nounwind uwtable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 307355)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/test/Transforms/SimplifyCFG/implied-and-or.ll b/test/Transforms/SimplifyCFG/implied-and-or.ll new file mode 100644 index 000000000000..e615f302feef --- /dev/null +++ b/test/Transforms/SimplifyCFG/implied-and-or.ll @@ -0,0 +1,183 @@ +; RUN: opt %s -S -simplifycfg | FileCheck %s + +declare void @foo() +declare void @bar() + + +; CHECK-LABEL: @test_and1 +; CHECK: taken: +; CHECK-NOT: cmp3 +; CHECK: call void @bar() +; CHECK-NEXT: call void @foo() +; CHECK: ret +define void @test_and1(i32 %a, i32 %b) { +entry: + %cmp1 = icmp eq i32 %a, 0 + %cmp2 = icmp eq i32 %b, 0 + %and = and i1 %cmp1, %cmp2 + br i1 %and, label %taken, label %end + +taken: + call void @bar() + %cmp3 = icmp eq i32 %a, 0 ;; <-- implied true + br i1 %cmp3, label %if.then, label %end + +if.then: + call void @foo() + br label %end + +end: + ret void +} + +; We can't infer anything if the result of the 'and' is false +; CHECK-LABEL: @test_and2 +; CHECK: taken: +; CHECK: call void @bar() +; CHECK: %cmp3 +; CHECK: br i1 %cmp3 +; CHECK: if.then: +; CHECK: call void @foo() +; CHECK: ret +define void @test_and2(i32 %a, i32 %b) { +entry: + %cmp1 = icmp eq i32 %a, 0 + %cmp2 = icmp eq i32 %b, 0 + %and = and i1 %cmp1, %cmp2 + br i1 %and, label %end, label %taken + +taken: + call void @bar() + %cmp3 = icmp eq i32 %a, 0 + br i1 %cmp3, label %if.then, label %end + +if.then: + call void @foo() + br label %end + +end: + ret void +} + +; CHECK-LABEL: @test_or1 +; CHECK: taken: +; CHECK-NOT: cmp3 +; CHECK: call void @bar() +; CHECK-NEXT: call void @foo() +; CHECK: ret +define void @test_or1(i32 %a, i32 %b) { +entry: + %cmp1 = icmp eq i32 %a, 0 + %cmp2 = icmp eq i32 %b, 0 + %or = or i1 %cmp1, %cmp2 + br i1 %or, label %end, label %taken + +taken: + call void @bar() + %cmp3 = icmp ne i32 %a, 0 ;; <-- implied true + br i1 %cmp3, label %if.then, label %end + +if.then: + call void @foo() + br label %end + +end: + ret void +} + +; We can't infer anything if the result of the 'or' is true +; CHECK-LABEL: @test_or2 +; CHECK: call void @bar() +; CHECK: %cmp3 +; CHECK: br i1 %cmp3 +; CHECK: if.then: +; CHECK: call void @foo() +; CHECK: ret +define void @test_or2(i32 %a, i32 %b) { +entry: + %cmp1 = icmp eq i32 %a, 0 + %cmp2 = icmp eq i32 %b, 0 + %or = or i1 %cmp1, %cmp2 + br i1 %or, label %taken, label %end + +taken: + call void @bar() + %cmp3 = icmp eq i32 %a, 0 + br i1 %cmp3, label %if.then, label %end + +if.then: + call void @foo() + br label %end + +end: + ret void +} + +; We can recurse a tree of 'and' or 'or's. +; CHECK-LABEL: @test_and_recurse1 +; CHECK: taken: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label %end +; CHECK: ret +define void @test_and_recurse1(i32 %a, i32 %b, i32 %c) { +entry: + %cmpa = icmp eq i32 %a, 0 + %cmpb = icmp eq i32 %b, 0 + %cmpc = icmp eq i32 %c, 0 + %and1 = and i1 %cmpa, %cmpb + %and2 = and i1 %and1, %cmpc + br i1 %and2, label %taken, label %end + +taken: + call void @bar() + %cmp3 = icmp eq i32 %a, 0 + br i1 %cmp3, label %if.then, label %end + +if.then: + call void @foo() + br label %end + +end: + ret void +} + +; Check to make sure we don't recurse too deep. +; CHECK-LABEL: @test_and_recurse2 +; CHECK: taken: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: %cmp3 = icmp eq i32 %a, 0 +; CHECK-NEXT: br i1 %cmp3, label %if.then, label %end +; CHECK: ret +define void @test_and_recurse2(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, + i32 %g, i32 %h) { +entry: + %cmpa = icmp eq i32 %a, 0 + %cmpb = icmp eq i32 %b, 0 + %cmpc = icmp eq i32 %c, 0 + %cmpd = icmp eq i32 %d, 0 + %cmpe = icmp eq i32 %e, 0 + %cmpf = icmp eq i32 %f, 0 + %cmpg = icmp eq i32 %g, 0 + %cmph = icmp eq i32 %h, 0 + %and1 = and i1 %cmpa, %cmpb + %and2 = and i1 %and1, %cmpc + %and3 = and i1 %and2, %cmpd + %and4 = and i1 %and3, %cmpe + %and5 = and i1 %and4, %cmpf + %and6 = and i1 %and5, %cmpg + %and7 = and i1 %and6, %cmph + br i1 %and7, label %taken, label %end + +taken: + call void @bar() + %cmp3 = icmp eq i32 %a, 0 ; <-- can be implied true + br i1 %cmp3, label %if.then, label %end + +if.then: + call void @foo() + br label %end + +end: + ret void +} diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll index 0f7bfa8516c9..513da477607b 100644 --- a/test/Transforms/SimplifyCFG/sink-common-code.ll +++ b/test/Transforms/SimplifyCFG/sink-common-code.ll @@ -818,6 +818,30 @@ merge: ; CHECK: right: ; CHECK-NEXT: %val1 = call i32 @call_target() [ "deopt"(i32 20) ] +%T = type {i32, i32} + +define i32 @test_insertvalue(i1 zeroext %flag, %T %P) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %t1 = insertvalue %T %P, i32 0, 0 + br label %if.end + +if.else: + %t2 = insertvalue %T %P, i32 1, 0 + br label %if.end + +if.end: + %t = phi %T [%t1, %if.then], [%t2, %if.else] + ret i32 1 +} + +; CHECK-LABEL: @test_insertvalue +; CHECK: select +; CHECK: insertvalue +; CHECK-NOT: insertvalue + ; CHECK: ![[TBAA]] = !{![[TYPE:[0-9]]], ![[TYPE]], i64 0} ; CHECK: ![[TYPE]] = !{!"float", ![[TEXT:[0-9]]]} ; CHECK: ![[TEXT]] = !{!"an example type tree"} diff --git a/test/Transforms/Sink/fence.ll b/test/Transforms/Sink/fence.ll index aa237d8192b6..09aa565d88f8 100644 --- a/test/Transforms/Sink/fence.ll +++ b/test/Transforms/Sink/fence.ll @@ -5,9 +5,9 @@ target triple = "x86_64-unknown-linux-gnu" define void @test1(i32* ()*) { entry: %1 = call i32* %0() #0 - fence singlethread seq_cst + fence syncscope("singlethread") seq_cst %2 = load i32, i32* %1, align 4 - fence singlethread seq_cst + fence syncscope("singlethread") seq_cst %3 = icmp eq i32 %2, 0 br i1 %3, label %fail, label %pass @@ -20,9 +20,9 @@ pass: ; preds = %fail, %top ; CHECK-LABEL: @test1( ; CHECK: %[[call:.*]] = call i32* %0() -; CHECK: fence singlethread seq_cst +; CHECK: fence syncscope("singlethread") seq_cst ; CHECK: load i32, i32* %[[call]], align 4 -; CHECK: fence singlethread seq_cst +; CHECK: fence syncscope("singlethread") seq_cst attributes #0 = { nounwind readnone } diff --git a/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll b/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll new file mode 100644 index 000000000000..661d0739401a --- /dev/null +++ b/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll @@ -0,0 +1,37 @@ +; Test for a bug specific to the new pass manager where we may build a domtree +; to make more precise AA queries for functions. +; +; RUN: opt -aa-pipeline=default -passes='no-op-module' -debug-pass-manager -thinlto-bc -o %t %s +; RUN: llvm-modextract -b -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=M0 %s +; RUN: llvm-modextract -b -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=M1 %s + +target triple = "x86_64-unknown-linux-gnu" + +%struct.hoge = type { %struct.widget } +%struct.widget = type { i32 (...)** } + +; M0: @global = local_unnamed_addr global +; M1-NOT: @global +@global = local_unnamed_addr global %struct.hoge { %struct.widget { i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @global.1, i32 0, inrange i32 0, i32 2) to i32 (...)**) } }, align 8 + +; M0: @global.1 = external unnamed_addr constant +; M1: @global.1 = linkonce_odr unnamed_addr constant +@global.1 = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @global.4 to i8*), i8* bitcast (i32 (%struct.widget*)* @quux to i8*)] }, align 8, !type !0 + +; M0: @global.2 = external global +; M1-NOT: @global.2 +@global.2 = external global i8* + +; M0: @global.3 = linkonce_odr constant +; M1-NOT: @global.3 +@global.3 = linkonce_odr constant [22 x i8] c"zzzzzzzzzzzzzzzzzzzzz\00" + +; M0: @global.4 = linkonce_odr constant +; M1: @global.4 = external constant +@global.4 = linkonce_odr constant { i8*, i8* }{ i8* bitcast (i8** getelementptr inbounds (i8*, i8** @global.2, i64 2) to i8*), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @global.3, i32 0, i32 0) } + +@llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer + +declare i32 @quux(%struct.widget*) unnamed_addr + +!0 = !{i64 16, !"yyyyyyyyyyyyyyyyyyyyyyyyy"} |