From 6b3f41ed88e8e440e11a4fbf20b6600529f80049 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 16 May 2017 19:46:52 +0000 Subject: Vendor import of llvm trunk r303197: https://llvm.org/svn/llvm-project/llvm/trunk@303197 --- .../CodeExtractor/ExtractedFnEntryCount.ll | 2 +- .../CodeExtractor/MultipleExitBranchProb.ll | 2 +- test/Transforms/CodeExtractor/PartialInlineAnd.ll | 4 +- .../CodeExtractor/PartialInlineEntryUpdate.ll | 41 + .../CodeExtractor/PartialInlineHighCost.ll | 107 + test/Transforms/CodeExtractor/PartialInlineOr.ll | 4 +- .../Transforms/CodeExtractor/PartialInlineOrAnd.ll | 4 +- test/Transforms/CodeExtractor/SingleCondition.ll | 4 +- .../CodeExtractor/X86/InheritTargetAttributes.ll | 4 +- .../Transforms/CodeGenPrepare/section-samplepgo.ll | 57 + test/Transforms/CodeGenPrepare/section.ll | 20 +- test/Transforms/ConstProp/calls-math-finite.ll | 83 + test/Transforms/ConstProp/calls.ll | 206 -- test/Transforms/ConstProp/sse.ll | 208 ++ .../Coroutines/coro-eh-aware-edge-split.ll | 218 ++ .../GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll | 7 +- test/Transforms/GVN/PRE/nonintegral.ll | 39 + .../IndVarSimplify/2011-10-27-lftrnull.ll | 2 +- test/Transforms/InferFunctionAttrs/annotate.ll | 126 + test/Transforms/InferFunctionAttrs/no-proto.ll | 126 + test/Transforms/Inline/inline-cold.ll | 20 +- .../inline-constexpr-addrspacecast-argument.ll | 2 +- test/Transforms/Inline/partial-inline-act.ll | 2 +- test/Transforms/Inline/prof-update.ll | 35 +- .../InstCombine/2012-04-23-Neon-Intrinsics.ll | 135 - .../AArch64/2012-04-23-Neon-Intrinsics.ll | 71 + test/Transforms/InstCombine/AArch64/lit.local.cfg | 2 + .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 1540 +++++++++ test/Transforms/InstCombine/AMDGPU/lit.local.cfg | 2 + .../InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll | 65 + .../InstCombine/ARM/constant-fold-hang.ll | 14 + test/Transforms/InstCombine/ARM/lit.local.cfg | 2 + test/Transforms/InstCombine/ARM/neon-intrinsics.ll | 25 + .../InstCombine/PowerPC/aligned-altivec.ll | 131 + test/Transforms/InstCombine/PowerPC/aligned-qpx.ll | 165 + test/Transforms/InstCombine/PowerPC/lit.local.cfg | 3 + .../InstCombine/PowerPC/vsx-unaligned.ll | 44 + .../InstCombine/X86/X86FsubCmpCombine.ll | 181 ++ test/Transforms/InstCombine/X86/blend_x86.ll | 151 + test/Transforms/InstCombine/X86/lit.local.cfg | 2 + test/Transforms/InstCombine/X86/pr2645-1.ll | 39 + .../InstCombine/X86/shufflemask-undef.ll | 110 + test/Transforms/InstCombine/X86/x86-avx2.ll | 109 + test/Transforms/InstCombine/X86/x86-avx512.ll | 2793 ++++++++++++++++ .../InstCombine/X86/x86-crc32-demanded.ll | 17 + test/Transforms/InstCombine/X86/x86-f16c.ll | 68 + test/Transforms/InstCombine/X86/x86-fma.ll | 315 ++ test/Transforms/InstCombine/X86/x86-insertps.ll | 166 + .../InstCombine/X86/x86-masked-memops.ll | 328 ++ test/Transforms/InstCombine/X86/x86-movmsk.ll | 324 ++ test/Transforms/InstCombine/X86/x86-muldq.ll | 245 ++ test/Transforms/InstCombine/X86/x86-pack.ll | 366 +++ test/Transforms/InstCombine/X86/x86-pshufb.ll | 515 +++ test/Transforms/InstCombine/X86/x86-sse.ll | 613 ++++ test/Transforms/InstCombine/X86/x86-sse2.ll | 460 +++ test/Transforms/InstCombine/X86/x86-sse41.ll | 98 + test/Transforms/InstCombine/X86/x86-sse4a.ll | 408 +++ .../InstCombine/X86/x86-vec_demanded_elts.ll | 110 + .../InstCombine/X86/x86-vector-shifts.ll | 3434 ++++++++++++++++++++ test/Transforms/InstCombine/X86/x86-vperm2.ll | 313 ++ test/Transforms/InstCombine/X86/x86-vpermil.ll | 298 ++ test/Transforms/InstCombine/X86/x86-xop.ll | 305 ++ test/Transforms/InstCombine/X86FsubCmpCombine.ll | 181 -- test/Transforms/InstCombine/add.ll | 26 + test/Transforms/InstCombine/aligned-altivec.ll | 131 - test/Transforms/InstCombine/aligned-qpx.ll | 165 - test/Transforms/InstCombine/amdgcn-intrinsics.ll | 1540 --------- test/Transforms/InstCombine/and.ll | 2 +- test/Transforms/InstCombine/bit-tracking.ll | 26 - test/Transforms/InstCombine/blend_x86.ll | 151 - test/Transforms/InstCombine/cast.ll | 38 + test/Transforms/InstCombine/constant-fold-hang.ll | 14 - .../InstCombine/constant-fold-iteration.ll | 10 + test/Transforms/InstCombine/demorgan.ll | 8 +- test/Transforms/InstCombine/icmp.ll | 15 + test/Transforms/InstCombine/intrinsics.ll | 29 +- test/Transforms/InstCombine/logical-select.ll | 75 + test/Transforms/InstCombine/neon-intrinsics.ll | 25 - test/Transforms/InstCombine/not.ll | 76 +- test/Transforms/InstCombine/or-xor.ll | 70 + test/Transforms/InstCombine/or.ll | 109 - test/Transforms/InstCombine/pr2645-1.ll | 39 - test/Transforms/InstCombine/sext.ll | 2 +- test/Transforms/InstCombine/shufflemask-undef.ll | 109 - test/Transforms/InstCombine/trunc.ll | 2 +- test/Transforms/InstCombine/vec_demanded_elts.ll | 108 - test/Transforms/InstCombine/vsx-unaligned.ll | 44 - test/Transforms/InstCombine/x86-avx2.ll | 109 - test/Transforms/InstCombine/x86-avx512.ll | 2793 ---------------- test/Transforms/InstCombine/x86-crc32-demanded.ll | 17 - test/Transforms/InstCombine/x86-f16c.ll | 68 - test/Transforms/InstCombine/x86-fma.ll | 315 -- test/Transforms/InstCombine/x86-insertps.ll | 166 - test/Transforms/InstCombine/x86-masked-memops.ll | 328 -- test/Transforms/InstCombine/x86-movmsk.ll | 324 -- test/Transforms/InstCombine/x86-muldq.ll | 245 -- test/Transforms/InstCombine/x86-pack.ll | 366 --- test/Transforms/InstCombine/x86-pshufb.ll | 515 --- test/Transforms/InstCombine/x86-sse.ll | 613 ---- test/Transforms/InstCombine/x86-sse2.ll | 460 --- test/Transforms/InstCombine/x86-sse41.ll | 98 - test/Transforms/InstCombine/x86-sse4a.ll | 408 --- test/Transforms/InstCombine/x86-vector-shifts.ll | 3434 -------------------- test/Transforms/InstCombine/x86-vperm2.ll | 313 -- test/Transforms/InstCombine/x86-vpermil.ll | 298 -- test/Transforms/InstCombine/x86-xop.ll | 305 -- test/Transforms/InstCombine/xor2.ll | 11 - test/Transforms/InstNamer/basic.ll | 19 + test/Transforms/InstSimplify/AndOrXor.ll | 173 + test/Transforms/InstSimplify/apint-or.ll | 72 - test/Transforms/InstSimplify/compare.ll | 7 +- test/Transforms/InstSimplify/or.ll | 181 ++ test/Transforms/LoopIdiom/ARM/ctlz.ll | 185 ++ test/Transforms/LoopIdiom/X86/ctlz.ll | 185 ++ test/Transforms/LoopUnroll/not-rotated.ll | 2 +- .../LoopVectorize/X86/svml-calls-finite.ll | 187 ++ test/Transforms/LoopVectorize/induction.ll | 45 + test/Transforms/LoopVectorize/pr32859.ll | 30 + test/Transforms/NewGVN/pr32934.ll | 69 + test/Transforms/NewGVN/pr32952.ll | 42 + test/Transforms/NewGVN/verify-memoryphi.ll | 29 + .../SLPVectorizer/AArch64/64-bit-vector.ll | 22 + .../SLPVectorizer/AArch64/getelementptr.ll | 43 +- .../Transforms/SLPVectorizer/AArch64/horizontal.ll | 33 +- test/Transforms/SLPVectorizer/AArch64/remarks.ll | 32 + test/Transforms/SLPVectorizer/X86/arith-add.ll | 649 ++++ test/Transforms/SLPVectorizer/X86/arith-mul.ll | 700 ++++ test/Transforms/SLPVectorizer/X86/arith-sub.ll | 649 ++++ test/Transforms/SLPVectorizer/X86/shift-ashr.ll | 913 ++++++ test/Transforms/SLPVectorizer/X86/shift-lshr.ll | 862 +++++ test/Transforms/SLPVectorizer/X86/shift-shl.ll | 814 +++++ .../SimpleLoopUnswitch/trivial-unswitch.ll | 199 ++ test/Transforms/SpeculativeExecution/spec-other.ll | 32 - .../Transforms/SpeculativeExecution/spec-vector.ll | 73 - test/Transforms/Util/split-bit-piece.ll | 110 +- 135 files changed, 21366 insertions(+), 14462 deletions(-) create mode 100644 test/Transforms/CodeExtractor/PartialInlineEntryUpdate.ll create mode 100644 test/Transforms/CodeExtractor/PartialInlineHighCost.ll create mode 100644 test/Transforms/CodeGenPrepare/section-samplepgo.ll create mode 100644 test/Transforms/ConstProp/calls-math-finite.ll create mode 100644 test/Transforms/ConstProp/sse.ll create mode 100644 test/Transforms/Coroutines/coro-eh-aware-edge-split.ll create mode 100644 test/Transforms/GVN/PRE/nonintegral.ll delete mode 100644 test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll create mode 100644 test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll create mode 100644 test/Transforms/InstCombine/AArch64/lit.local.cfg create mode 100644 test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll create mode 100644 test/Transforms/InstCombine/AMDGPU/lit.local.cfg create mode 100644 test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll create mode 100644 test/Transforms/InstCombine/ARM/constant-fold-hang.ll create mode 100644 test/Transforms/InstCombine/ARM/lit.local.cfg create mode 100644 test/Transforms/InstCombine/ARM/neon-intrinsics.ll create mode 100644 test/Transforms/InstCombine/PowerPC/aligned-altivec.ll create mode 100644 test/Transforms/InstCombine/PowerPC/aligned-qpx.ll create mode 100644 test/Transforms/InstCombine/PowerPC/lit.local.cfg create mode 100644 test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll create mode 100644 test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll create mode 100644 test/Transforms/InstCombine/X86/blend_x86.ll create mode 100644 test/Transforms/InstCombine/X86/lit.local.cfg create mode 100644 test/Transforms/InstCombine/X86/pr2645-1.ll create mode 100644 test/Transforms/InstCombine/X86/shufflemask-undef.ll create mode 100644 test/Transforms/InstCombine/X86/x86-avx2.ll create mode 100644 test/Transforms/InstCombine/X86/x86-avx512.ll create mode 100644 test/Transforms/InstCombine/X86/x86-crc32-demanded.ll create mode 100644 test/Transforms/InstCombine/X86/x86-f16c.ll create mode 100644 test/Transforms/InstCombine/X86/x86-fma.ll create mode 100644 test/Transforms/InstCombine/X86/x86-insertps.ll create mode 100644 test/Transforms/InstCombine/X86/x86-masked-memops.ll create mode 100644 test/Transforms/InstCombine/X86/x86-movmsk.ll create mode 100644 test/Transforms/InstCombine/X86/x86-muldq.ll create mode 100644 test/Transforms/InstCombine/X86/x86-pack.ll create mode 100644 test/Transforms/InstCombine/X86/x86-pshufb.ll create mode 100644 test/Transforms/InstCombine/X86/x86-sse.ll create mode 100644 test/Transforms/InstCombine/X86/x86-sse2.ll create mode 100644 test/Transforms/InstCombine/X86/x86-sse41.ll create mode 100644 test/Transforms/InstCombine/X86/x86-sse4a.ll create mode 100644 test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll create mode 100644 test/Transforms/InstCombine/X86/x86-vector-shifts.ll create mode 100644 test/Transforms/InstCombine/X86/x86-vperm2.ll create mode 100644 test/Transforms/InstCombine/X86/x86-vpermil.ll create mode 100644 test/Transforms/InstCombine/X86/x86-xop.ll delete mode 100644 test/Transforms/InstCombine/X86FsubCmpCombine.ll delete mode 100644 test/Transforms/InstCombine/aligned-altivec.ll delete mode 100644 test/Transforms/InstCombine/aligned-qpx.ll delete mode 100644 test/Transforms/InstCombine/amdgcn-intrinsics.ll delete mode 100644 test/Transforms/InstCombine/bit-tracking.ll delete mode 100644 test/Transforms/InstCombine/blend_x86.ll delete mode 100644 test/Transforms/InstCombine/constant-fold-hang.ll create mode 100644 test/Transforms/InstCombine/constant-fold-iteration.ll delete mode 100644 test/Transforms/InstCombine/neon-intrinsics.ll delete mode 100644 test/Transforms/InstCombine/pr2645-1.ll delete mode 100644 test/Transforms/InstCombine/shufflemask-undef.ll delete mode 100644 test/Transforms/InstCombine/vsx-unaligned.ll delete mode 100644 test/Transforms/InstCombine/x86-avx2.ll delete mode 100644 test/Transforms/InstCombine/x86-avx512.ll delete mode 100644 test/Transforms/InstCombine/x86-crc32-demanded.ll delete mode 100644 test/Transforms/InstCombine/x86-f16c.ll delete mode 100644 test/Transforms/InstCombine/x86-fma.ll delete mode 100644 test/Transforms/InstCombine/x86-insertps.ll delete mode 100644 test/Transforms/InstCombine/x86-masked-memops.ll delete mode 100644 test/Transforms/InstCombine/x86-movmsk.ll delete mode 100644 test/Transforms/InstCombine/x86-muldq.ll delete mode 100644 test/Transforms/InstCombine/x86-pack.ll delete mode 100644 test/Transforms/InstCombine/x86-pshufb.ll delete mode 100644 test/Transforms/InstCombine/x86-sse.ll delete mode 100644 test/Transforms/InstCombine/x86-sse2.ll delete mode 100644 test/Transforms/InstCombine/x86-sse41.ll delete mode 100644 test/Transforms/InstCombine/x86-sse4a.ll delete mode 100644 test/Transforms/InstCombine/x86-vector-shifts.ll delete mode 100644 test/Transforms/InstCombine/x86-vperm2.ll delete mode 100644 test/Transforms/InstCombine/x86-vpermil.ll delete mode 100644 test/Transforms/InstCombine/x86-xop.ll create mode 100644 test/Transforms/InstNamer/basic.ll delete mode 100644 test/Transforms/InstSimplify/apint-or.ll create mode 100644 test/Transforms/InstSimplify/or.ll create mode 100644 test/Transforms/LoopIdiom/ARM/ctlz.ll create mode 100644 test/Transforms/LoopIdiom/X86/ctlz.ll create mode 100644 test/Transforms/LoopVectorize/X86/svml-calls-finite.ll create mode 100644 test/Transforms/LoopVectorize/pr32859.ll create mode 100644 test/Transforms/NewGVN/pr32934.ll create mode 100644 test/Transforms/NewGVN/pr32952.ll create mode 100644 test/Transforms/NewGVN/verify-memoryphi.ll create mode 100644 test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll create mode 100644 test/Transforms/SLPVectorizer/AArch64/remarks.ll create mode 100644 test/Transforms/SLPVectorizer/X86/arith-add.ll create mode 100644 test/Transforms/SLPVectorizer/X86/arith-mul.ll create mode 100644 test/Transforms/SLPVectorizer/X86/arith-sub.ll create mode 100644 test/Transforms/SLPVectorizer/X86/shift-ashr.ll create mode 100644 test/Transforms/SLPVectorizer/X86/shift-lshr.ll create mode 100644 test/Transforms/SLPVectorizer/X86/shift-shl.ll delete mode 100644 test/Transforms/SpeculativeExecution/spec-other.ll delete mode 100644 test/Transforms/SpeculativeExecution/spec-vector.ll (limited to 'test/Transforms') diff --git a/test/Transforms/CodeExtractor/ExtractedFnEntryCount.ll b/test/Transforms/CodeExtractor/ExtractedFnEntryCount.ll index 509a4d7bfa18..8313cfac04ee 100644 --- a/test/Transforms/CodeExtractor/ExtractedFnEntryCount.ll +++ b/test/Transforms/CodeExtractor/ExtractedFnEntryCount.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -partial-inliner -S | FileCheck %s +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s ; This test checks to make sure that the CodeExtractor ; properly sets the entry count for the function that is diff --git a/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll b/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll index 425e96973596..8e362080dc48 100644 --- a/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll +++ b/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s +; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -skip-partial-inlining-cost-analysis -S | FileCheck %s ; This test checks to make sure that CodeExtractor updates ; the exit branch probabilities for multiple exit blocks. diff --git a/test/Transforms/CodeExtractor/PartialInlineAnd.ll b/test/Transforms/CodeExtractor/PartialInlineAnd.ll index e981a5ba5816..d32d834d2df3 100644 --- a/test/Transforms/CodeExtractor/PartialInlineAnd.ll +++ b/test/Transforms/CodeExtractor/PartialInlineAnd.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -partial-inliner -S | FileCheck %s ; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s -; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s -; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s +; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s ; Function Attrs: nounwind uwtable define i32 @bar(i32 %arg) local_unnamed_addr #0 { diff --git a/test/Transforms/CodeExtractor/PartialInlineEntryUpdate.ll b/test/Transforms/CodeExtractor/PartialInlineEntryUpdate.ll new file mode 100644 index 000000000000..3a7a9752e507 --- /dev/null +++ b/test/Transforms/CodeExtractor/PartialInlineEntryUpdate.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -skip-partial-inlining-cost-analysis -partial-inliner -S | FileCheck %s +; RUN: opt < %s -skip-partial-inlining-cost-analysis -passes=partial-inliner -S | FileCheck %s + +define i32 @Func(i1 %cond, i32* align 4 %align.val) !prof !1 { +; CHECK: @Func({{.*}}) !prof [[REMAINCOUNT:![0-9]+]] +entry: + br i1 %cond, label %if.then, label %return +if.then: + ; Dummy store to have more than 0 uses + store i32 10, i32* %align.val, align 4 + br label %return +return: ; preds = %entry + ret i32 0 +} + +define internal i32 @Caller1(i1 %cond, i32* align 2 %align.val) !prof !3{ +entry: +; CHECK-LABEL: @Caller1 +; CHECK: br +; CHECK: call void @Func.1_ +; CHECK: br +; CHECK: call void @Func.1_ + %val = call i32 @Func(i1 %cond, i32* %align.val) + %val2 = call i32 @Func(i1 %cond, i32* %align.val) + ret i32 %val +} + +define internal i32 @Caller2(i1 %cond, i32* align 2 %align.val) !prof !2{ +entry: +; CHECK-LABEL: @Caller2 +; CHECK: br +; CHECK: call void @Func.1_ + %val = call i32 @Func(i1 %cond, i32* %align.val) + ret i32 %val +} + +; CHECK: [[REMAINCOUNT]] = !{!"function_entry_count", i64 150} +!1 = !{!"function_entry_count", i64 200} +!2 = !{!"function_entry_count", i64 10} +!3 = !{!"function_entry_count", i64 20} + diff --git a/test/Transforms/CodeExtractor/PartialInlineHighCost.ll b/test/Transforms/CodeExtractor/PartialInlineHighCost.ll new file mode 100644 index 000000000000..e43a94dc6c37 --- /dev/null +++ b/test/Transforms/CodeExtractor/PartialInlineHighCost.ll @@ -0,0 +1,107 @@ +; The outlined region has high frequency and the outlining +; call sequence is expensive (input, output, multiple exit etc) +; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=NOCOST %s +; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -max-num-inline-blocks=2 -S | FileCheck --check-prefix=NOCOST %s + + +; Function Attrs: nounwind +define i32 @bar_hot_outline_region(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = icmp slt i32 %arg, 0 + br i1 %tmp, label %bb1, label %bb16, !prof !1 + +bb1: ; preds = %bb + %tmp2 = tail call i32 (...) @foo() #0 + %tmp3 = tail call i32 (...) @foo() #0 + %tmp4 = tail call i32 (...) @foo() #0 + %tmp5 = tail call i32 (...) @foo() #0 + %tmp6 = tail call i32 (...) @foo() #0 + %tmp7 = tail call i32 (...) @foo() #0 + %tmp8 = add nsw i32 %arg, 1 + %tmp9 = tail call i32 @goo(i32 %tmp8) #0 + %tmp10 = tail call i32 (...) @foo() #0 + %tmp11 = icmp eq i32 %tmp10, 0 + br i1 %tmp11, label %bb12, label %bb16 + +bb12: ; preds = %bb1 + %tmp13 = tail call i32 (...) @foo() #0 + %tmp14 = icmp eq i32 %tmp13, 0 + %tmp15 = select i1 %tmp14, i32 0, i32 3 + br label %bb16 + +bb16: ; preds = %bb12, %bb1, %bb + %tmp17 = phi i32 [ 2, %bb1 ], [ %tmp15, %bb12 ], [ 0, %bb ] + ret i32 %tmp17 +} + +define i32 @bar_cold_outline_region(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = icmp slt i32 %arg, 0 + br i1 %tmp, label %bb1, label %bb16, !prof !2 + +bb1: ; preds = %bb + %tmp2 = tail call i32 (...) @foo() #0 + %tmp3 = tail call i32 (...) @foo() #0 + %tmp4 = tail call i32 (...) @foo() #0 + %tmp5 = tail call i32 (...) @foo() #0 + %tmp6 = tail call i32 (...) @foo() #0 + %tmp7 = tail call i32 (...) @foo() #0 + %tmp8 = add nsw i32 %arg, 1 + %tmp9 = tail call i32 @goo(i32 %tmp8) #0 + %tmp10 = tail call i32 (...) @foo() #0 + %tmp11 = icmp eq i32 %tmp10, 0 + br i1 %tmp11, label %bb12, label %bb16 + +bb12: ; preds = %bb1 + %tmp13 = tail call i32 (...) @foo() #0 + %tmp14 = icmp eq i32 %tmp13, 0 + %tmp15 = select i1 %tmp14, i32 0, i32 3 + br label %bb16 + +bb16: ; preds = %bb12, %bb1, %bb + %tmp17 = phi i32 [ 2, %bb1 ], [ %tmp15, %bb12 ], [ 0, %bb ] + ret i32 %tmp17 +} + +; Function Attrs: nounwind +declare i32 @foo(...) local_unnamed_addr #0 + +; Function Attrs: nounwind +declare i32 @goo(i32) local_unnamed_addr #0 + +; Function Attrs: nounwind +define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 { +bb: +; CHECK-LABEL: @dummy_caller +; CHECK-NOT: br i1 +; CHECK-NOT: call{{.*}}bar_hot_outline_region. +; NOCOST-LABEL: @dummy_caller +; NOCOST: br i1 +; NOCOST: call{{.*}}bar_hot_outline_region. + + %tmp = tail call i32 @bar_hot_outline_region(i32 %arg) + ret i32 %tmp +} + +define i32 @dummy_caller2(i32 %arg) local_unnamed_addr #0 { +bb: +; CHECK-LABEL: @dummy_caller2 +; CHECK: br i1 +; CHECK: call{{.*}}bar_cold_outline_region. +; NOCOST-LABEL: @dummy_caller2 +; NOCOST: br i1 +; NOCOST: call{{.*}}bar_cold_outline_region. + + %tmp = tail call i32 @bar_cold_outline_region(i32 %arg) + ret i32 %tmp +} + +attributes #0 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 (trunk 301898)"} +!1 = !{!"branch_weights", i32 2000, i32 1} +!2 = !{!"branch_weights", i32 1, i32 100} diff --git a/test/Transforms/CodeExtractor/PartialInlineOr.ll b/test/Transforms/CodeExtractor/PartialInlineOr.ll index 5408b4faaf70..758945c7ade5 100644 --- a/test/Transforms/CodeExtractor/PartialInlineOr.ll +++ b/test/Transforms/CodeExtractor/PartialInlineOr.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -partial-inliner -S | FileCheck %s -; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s ; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s diff --git a/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll b/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll index 282d300fadb9..fb6d1c335361 100644 --- a/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll +++ b/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -partial-inliner -S | FileCheck %s ; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s -; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT3 %s -; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT3 %s +; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -skip-partial-inlining-cost-analysis -S | FileCheck --check-prefix=LIMIT3 %s +; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -skip-partial-inlining-cost-analysis -S | FileCheck --check-prefix=LIMIT3 %s ; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s ; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s diff --git a/test/Transforms/CodeExtractor/SingleCondition.ll b/test/Transforms/CodeExtractor/SingleCondition.ll index 90cda889a21b..4110cd95b7ee 100644 --- a/test/Transforms/CodeExtractor/SingleCondition.ll +++ b/test/Transforms/CodeExtractor/SingleCondition.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -partial-inliner -S | FileCheck %s -; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s +; RUN: opt < %s -skip-partial-inlining-cost-analysis -partial-inliner -S | FileCheck %s +; RUN: opt < %s -skip-partial-inlining-cost-analysis -passes=partial-inliner -S | FileCheck %s define internal i32 @inlinedFunc(i1 %cond, i32* align 4 %align.val) { entry: diff --git a/test/Transforms/CodeExtractor/X86/InheritTargetAttributes.ll b/test/Transforms/CodeExtractor/X86/InheritTargetAttributes.ll index 41d883c8c378..0f8a71907d85 100644 --- a/test/Transforms/CodeExtractor/X86/InheritTargetAttributes.ll +++ b/test/Transforms/CodeExtractor/X86/InheritTargetAttributes.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -partial-inliner | llc -filetype=null -; RUN: opt < %s -partial-inliner -S | FileCheck %s +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis | llc -filetype=null +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s ; This testcase checks to see if CodeExtractor properly inherits ; target specific attributes for the extracted function. This can ; cause certain instructions that depend on the attributes to not diff --git a/test/Transforms/CodeGenPrepare/section-samplepgo.ll b/test/Transforms/CodeGenPrepare/section-samplepgo.ll new file mode 100644 index 000000000000..93d2a5f2542c --- /dev/null +++ b/test/Transforms/CodeGenPrepare/section-samplepgo.ll @@ -0,0 +1,57 @@ +; RUN: opt < %s -codegenprepare -S | FileCheck %s + +target triple = "x86_64-pc-linux-gnu" + +; This tests that hot/cold functions get correct section prefix assigned + +; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] +; The entry is hot +define void @hot_func() !prof !15 { + ret void +} + +; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]] +; The sum of 2 callsites are hot +define void @hot_call_func() !prof !16 { + call void @hot_func(), !prof !17 + call void @hot_func(), !prof !17 + ret void +} + +; CHECK-NOT: normal_func{{.*}}!section_prefix +; The sum of all callsites are neither hot or cold +define void @normal_func() !prof !16 { + call void @hot_func(), !prof !17 + call void @hot_func(), !prof !18 + call void @hot_func(), !prof !18 + ret void +} + +; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] +; The entry and the callsite are both cold +define void @cold_func() !prof !16 { + call void @hot_func(), !prof !18 + ret void +} + +; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"} +; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"} +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"function_entry_count", i64 1} +!17 = !{!"branch_weights", i32 80} +!18 = !{!"branch_weights", i32 1} diff --git a/test/Transforms/CodeGenPrepare/section.ll b/test/Transforms/CodeGenPrepare/section.ll index 2c96612e1baf..4f3144e7fc73 100644 --- a/test/Transforms/CodeGenPrepare/section.ll +++ b/test/Transforms/CodeGenPrepare/section.ll @@ -10,32 +10,32 @@ define void @hot_func() !prof !15 { ret void } -; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]] -; The sum of 2 callsites are hot -define void @hot_call_func() !prof !16 { +; For instrumentation based PGO, we should only look at entry counts, +; not call site VP metadata (which can exist on value profiled memcpy, +; or possibly left behind after static analysis based devirtualization). +; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] +define void @cold_func1() !prof !16 { call void @hot_func(), !prof !17 call void @hot_func(), !prof !17 ret void } -; CHECK-NOT: normal_func{{.*}}!section_prefix -; The sum of all callsites are neither hot or cold -define void @normal_func() !prof !16 { +; CHECK: cold_func2{{.*}}!section_prefix +define void @cold_func2() !prof !16 { call void @hot_func(), !prof !17 call void @hot_func(), !prof !18 call void @hot_func(), !prof !18 ret void } -; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] -; The entry and the callsite are both cold -define void @cold_func() !prof !16 { +; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]] +define void @cold_func3() !prof !16 { call void @hot_func(), !prof !18 ret void } ; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"} -; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".cold"} +; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"} !llvm.module.flags = !{!1} !1 = !{i32 1, !"ProfileSummary", !2} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} diff --git a/test/Transforms/ConstProp/calls-math-finite.ll b/test/Transforms/ConstProp/calls-math-finite.ll new file mode 100644 index 000000000000..00041f3e4a4b --- /dev/null +++ b/test/Transforms/ConstProp/calls-math-finite.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -constprop -S | FileCheck %s + +; Test to verify constant folding can occur when math +; routines are mapped to the ___finite versions +; of functions due to __FINITE_MATH_ONLY__ being +; enabled on headers. All calls should constant +; fold away in this test. + +declare double @__acos_finite(double) #0 +declare float @__acosf_finite(float) #0 +declare double @__asin_finite(double) #0 +declare float @__asinf_finite(float) #0 +declare double @__atan2_finite(double, double) #0 +declare float @__atan2f_finite(float, float) #0 +declare double @__cosh_finite(double) #0 +declare float @__coshf_finite(float) #0 +declare double @__exp2_finite(double) #0 +declare float @__exp2f_finite(float) #0 +declare double @__exp_finite(double) #0 +declare float @__expf_finite(float) #0 +declare double @__log10_finite(double) #0 +declare float @__log10f_finite(float) #0 +declare double @__log_finite(double) #0 +declare float @__logf_finite(float) #0 +declare double @__pow_finite(double, double) #0 +declare float @__powf_finite(float, float) #0 +declare double @__sinh_finite(double) #0 +declare float @__sinhf_finite(float) #0 + +attributes #0 = { nounwind readnone } + +define void @T() { +; CHECK-LABEL: @T( + +; CHECK-NOT: call +; CHECK: ret + + %slot = alloca double + %slotf = alloca float + + %ACOS = call fast double @__acos_finite(double 1.000000e+00) + store double %ACOS, double* %slot + %ASIN = call fast double @__asin_finite(double 1.000000e+00) + store double %ASIN, double* %slot + %ATAN2 = call fast double @__atan2_finite(double 3.000000e+00, double 4.000000e+00) + store double %ATAN2, double* %slot + %COSH = call fast double @__cosh_finite(double 3.000000e+00) + store double %COSH, double* %slot + %EXP = call fast double @__exp_finite(double 3.000000e+00) + store double %EXP, double* %slot + %EXP2 = call fast double @__exp2_finite(double 3.000000e+00) + store double %EXP2, double* %slot + %LOG = call fast double @__log_finite(double 3.000000e+00) + store double %LOG, double* %slot + %LOG10 = call fast double @__log10_finite(double 3.000000e+00) + store double %LOG10, double* %slot + %POW = call fast double @__pow_finite(double 1.000000e+00, double 4.000000e+00) + store double %POW, double* %slot + %SINH = call fast double @__sinh_finite(double 3.000000e+00) + store double %SINH, double* %slot + + %ACOSF = call fast float @__acosf_finite(float 1.000000e+00) + store float %ACOSF, float* %slotf + %ASINF = call fast float @__asinf_finite(float 1.000000e+00) + store float %ASINF, float* %slotf + %ATAN2F = call fast float @__atan2f_finite(float 3.000000e+00, float 4.000000e+00) + store float %ATAN2F, float* %slotf + %COSHF = call fast float @__coshf_finite(float 3.000000e+00) + store float %COSHF, float* %slotf + %EXPF = call fast float @__expf_finite(float 3.000000e+00) + store float %EXPF, float* %slotf + %EXP2F = call fast float @__exp2f_finite(float 3.000000e+00) + store float %EXP2F, float* %slotf + %LOGF = call fast float @__logf_finite(float 3.000000e+00) + store float %LOGF, float* %slotf + %LOG10F = call fast float @__log10f_finite(float 3.000000e+00) + store float %LOG10F, float* %slotf + %POWF = call fast float @__powf_finite(float 3.000000e+00, float 4.000000e+00) + store float %POWF, float* %slotf + %SINHF = call fast float @__sinhf_finite(float 3.000000e+00) + store float %SINHF, float* %slotf + ret void +} diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll index 1175ea522175..161637cc92b8 100644 --- a/test/Transforms/ConstProp/calls.ll +++ b/test/Transforms/ConstProp/calls.ll @@ -184,212 +184,6 @@ define double @T() { ret double %d } -define i1 @test_sse_cvts_exact() nounwind readnone { -; CHECK-LABEL: @test_sse_cvts_exact( -; CHECK-NOT: call -; CHECK: ret i1 true -entry: - %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> ) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> ) nounwind - %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> ) nounwind - %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> ) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %cmp02 = icmp eq i32 %sum02, 10 - %cmp13 = icmp eq i64 %sum13, 10 - %b = and i1 %cmp02, %cmp13 - ret i1 %b -} - -; Inexact values should not fold as they are dependent on rounding mode -define i1 @test_sse_cvts_inexact() nounwind readnone { -; CHECK-LABEL: @test_sse_cvts_inexact( -; CHECK: call -; CHECK: call -; CHECK: call -; CHECK: call -entry: - %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> ) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> ) nounwind - %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> ) nounwind - %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> ) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %cmp02 = icmp eq i32 %sum02, 4 - %cmp13 = icmp eq i64 %sum13, 4 - %b = and i1 %cmp02, %cmp13 - ret i1 %b -} - -; FLT_MAX/DBL_MAX should not fold -define i1 @test_sse_cvts_max() nounwind readnone { -; CHECK-LABEL: @test_sse_cvts_max( -; CHECK: call -; CHECK: call -; CHECK: call -; CHECK: call -entry: - %fm = bitcast <4 x i32> to <4 x float> - %dm = bitcast <2 x i64> to <2 x double> - %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind - %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind - %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %sum02.sext = sext i32 %sum02 to i64 - %b = icmp eq i64 %sum02.sext, %sum13 - ret i1 %b -} - -; INF should not fold -define i1 @test_sse_cvts_inf() nounwind readnone { -; CHECK-LABEL: @test_sse_cvts_inf( -; CHECK: call -; CHECK: call -; CHECK: call -; CHECK: call -entry: - %fm = bitcast <4 x i32> to <4 x float> - %dm = bitcast <2 x i64> to <2 x double> - %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind - %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind - %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %sum02.sext = sext i32 %sum02 to i64 - %b = icmp eq i64 %sum02.sext, %sum13 - ret i1 %b -} - -; NAN should not fold -define i1 @test_sse_cvts_nan() nounwind readnone { -; CHECK-LABEL: @test_sse_cvts_nan( -; CHECK: call -; CHECK: call -; CHECK: call -; CHECK: call -entry: - %fm = bitcast <4 x i32> to <4 x float> - %dm = bitcast <2 x i64> to <2 x double> - %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind - %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind - %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %sum02.sext = sext i32 %sum02 to i64 - %b = icmp eq i64 %sum02.sext, %sum13 - ret i1 %b -} - -define i1 @test_sse_cvtts_exact() nounwind readnone { -; CHECK-LABEL: @test_sse_cvtts_exact( -; CHECK-NOT: call -; CHECK: ret i1 true -entry: - %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> ) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> ) nounwind - %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> ) nounwind - %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> ) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %cmp02 = icmp eq i32 %sum02, 10 - %cmp13 = icmp eq i64 %sum13, 10 - %b = and i1 %cmp02, %cmp13 - ret i1 %b -} - -define i1 @test_sse_cvtts_inexact() nounwind readnone { -; CHECK-LABEL: @test_sse_cvtts_inexact( -; CHECK-NOT: call -; CHECK: ret i1 true -entry: - %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> ) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> ) nounwind - %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> ) nounwind - %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> ) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %cmp02 = icmp eq i32 %sum02, 2 - %cmp13 = icmp eq i64 %sum13, 2 - %b = and i1 %cmp02, %cmp13 - ret i1 %b -} - -; FLT_MAX/DBL_MAX should not fold -define i1 @test_sse_cvtts_max() nounwind readnone { -; CHECK-LABEL: @test_sse_cvtts_max( -; CHECK: call -; CHECK: call -; CHECK: call -; CHECK: call -entry: - %fm = bitcast <4 x i32> to <4 x float> - %dm = bitcast <2 x i64> to <2 x double> - %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind - %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind - %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %sum02.sext = sext i32 %sum02 to i64 - %b = icmp eq i64 %sum02.sext, %sum13 - ret i1 %b -} - -; INF should not fold -define i1 @test_sse_cvtts_inf() nounwind readnone { -; CHECK-LABEL: @test_sse_cvtts_inf( -; CHECK: call -; CHECK: call -; CHECK: call -; CHECK: call -entry: - %fm = bitcast <4 x i32> to <4 x float> - %dm = bitcast <2 x i64> to <2 x double> - %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind - %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind - %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %sum02.sext = sext i32 %sum02 to i64 - %b = icmp eq i64 %sum02.sext, %sum13 - ret i1 %b -} - -; NAN should not fold -define i1 @test_sse_cvtts_nan() nounwind readnone { -; CHECK-LABEL: @test_sse_cvtts_nan( -; CHECK: call -; CHECK: call -; CHECK: call -; CHECK: call -entry: - %fm = bitcast <4 x i32> to <4 x float> - %dm = bitcast <2 x i64> to <2 x double> - %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind - %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind - %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind - %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind - %sum02 = add i32 %i0, %i2 - %sum13 = add i64 %i1, %i3 - %sum02.sext = sext i32 %sum02 to i64 - %b = icmp eq i64 %sum02.sext, %sum13 - ret i1 %b -} - -declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone -declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone -declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone -declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone -declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone -declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone -declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone -declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone - define double @test_intrinsic_pow() nounwind uwtable ssp { entry: ; CHECK-LABEL: @test_intrinsic_pow( diff --git a/test/Transforms/ConstProp/sse.ll b/test/Transforms/ConstProp/sse.ll new file mode 100644 index 000000000000..cc37c96c1ff1 --- /dev/null +++ b/test/Transforms/ConstProp/sse.ll @@ -0,0 +1,208 @@ +; RUN: opt < %s -constprop -S | FileCheck %s +; REQUIRES: x86 + +define i1 @test_sse_cvts_exact() nounwind readnone { +; CHECK-LABEL: @test_sse_cvts_exact( +; CHECK-NOT: call +; CHECK: ret i1 true +entry: + %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> ) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> ) nounwind + %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> ) nounwind + %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> ) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %cmp02 = icmp eq i32 %sum02, 10 + %cmp13 = icmp eq i64 %sum13, 10 + %b = and i1 %cmp02, %cmp13 + ret i1 %b +} + +; Inexact values should not fold as they are dependent on rounding mode +define i1 @test_sse_cvts_inexact() nounwind readnone { +; CHECK-LABEL: @test_sse_cvts_inexact( +; CHECK: call +; CHECK: call +; CHECK: call +; CHECK: call +entry: + %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> ) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> ) nounwind + %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> ) nounwind + %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> ) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %cmp02 = icmp eq i32 %sum02, 4 + %cmp13 = icmp eq i64 %sum13, 4 + %b = and i1 %cmp02, %cmp13 + ret i1 %b +} + +; FLT_MAX/DBL_MAX should not fold +define i1 @test_sse_cvts_max() nounwind readnone { +; CHECK-LABEL: @test_sse_cvts_max( +; CHECK: call +; CHECK: call +; CHECK: call +; CHECK: call +entry: + %fm = bitcast <4 x i32> to <4 x float> + %dm = bitcast <2 x i64> to <2 x double> + %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind + %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind + %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %sum02.sext = sext i32 %sum02 to i64 + %b = icmp eq i64 %sum02.sext, %sum13 + ret i1 %b +} + +; INF should not fold +define i1 @test_sse_cvts_inf() nounwind readnone { +; CHECK-LABEL: @test_sse_cvts_inf( +; CHECK: call +; CHECK: call +; CHECK: call +; CHECK: call +entry: + %fm = bitcast <4 x i32> to <4 x float> + %dm = bitcast <2 x i64> to <2 x double> + %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind + %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind + %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %sum02.sext = sext i32 %sum02 to i64 + %b = icmp eq i64 %sum02.sext, %sum13 + ret i1 %b +} + +; NAN should not fold +define i1 @test_sse_cvts_nan() nounwind readnone { +; CHECK-LABEL: @test_sse_cvts_nan( +; CHECK: call +; CHECK: call +; CHECK: call +; CHECK: call +entry: + %fm = bitcast <4 x i32> to <4 x float> + %dm = bitcast <2 x i64> to <2 x double> + %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind + %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind + %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %sum02.sext = sext i32 %sum02 to i64 + %b = icmp eq i64 %sum02.sext, %sum13 + ret i1 %b +} + +define i1 @test_sse_cvtts_exact() nounwind readnone { +; CHECK-LABEL: @test_sse_cvtts_exact( +; CHECK-NOT: call +; CHECK: ret i1 true +entry: + %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> ) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> ) nounwind + %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> ) nounwind + %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> ) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %cmp02 = icmp eq i32 %sum02, 10 + %cmp13 = icmp eq i64 %sum13, 10 + %b = and i1 %cmp02, %cmp13 + ret i1 %b +} + +define i1 @test_sse_cvtts_inexact() nounwind readnone { +; CHECK-LABEL: @test_sse_cvtts_inexact( +; CHECK-NOT: call +; CHECK: ret i1 true +entry: + %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> ) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> ) nounwind + %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> ) nounwind + %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> ) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %cmp02 = icmp eq i32 %sum02, 2 + %cmp13 = icmp eq i64 %sum13, 2 + %b = and i1 %cmp02, %cmp13 + ret i1 %b +} + +; FLT_MAX/DBL_MAX should not fold +define i1 @test_sse_cvtts_max() nounwind readnone { +; CHECK-LABEL: @test_sse_cvtts_max( +; CHECK: call +; CHECK: call +; CHECK: call +; CHECK: call +entry: + %fm = bitcast <4 x i32> to <4 x float> + %dm = bitcast <2 x i64> to <2 x double> + %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind + %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind + %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %sum02.sext = sext i32 %sum02 to i64 + %b = icmp eq i64 %sum02.sext, %sum13 + ret i1 %b +} + +; INF should not fold +define i1 @test_sse_cvtts_inf() nounwind readnone { +; CHECK-LABEL: @test_sse_cvtts_inf( +; CHECK: call +; CHECK: call +; CHECK: call +; CHECK: call +entry: + %fm = bitcast <4 x i32> to <4 x float> + %dm = bitcast <2 x i64> to <2 x double> + %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind + %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind + %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %sum02.sext = sext i32 %sum02 to i64 + %b = icmp eq i64 %sum02.sext, %sum13 + ret i1 %b +} + +; NAN should not fold +define i1 @test_sse_cvtts_nan() nounwind readnone { +; CHECK-LABEL: @test_sse_cvtts_nan( +; CHECK: call +; CHECK: call +; CHECK: call +; CHECK: call +entry: + %fm = bitcast <4 x i32> to <4 x float> + %dm = bitcast <2 x i64> to <2 x double> + %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind + %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind + %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind + %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind + %sum02 = add i32 %i0, %i2 + %sum13 = add i64 %i1, %i3 + %sum02.sext = sext i32 %sum02 to i64 + %b = icmp eq i64 %sum02.sext, %sum13 + ret i1 %b +} + +declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone +declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone +declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone +declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone +declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone +declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone +declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone diff --git a/test/Transforms/Coroutines/coro-eh-aware-edge-split.ll b/test/Transforms/Coroutines/coro-eh-aware-edge-split.ll new file mode 100644 index 000000000000..5da0e3c199db --- /dev/null +++ b/test/Transforms/Coroutines/coro-eh-aware-edge-split.ll @@ -0,0 +1,218 @@ +; Check that we can handle edge splits leading into a landingpad +; RUN: opt < %s -coro-split -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: define internal fastcc void @f.resume( +define void @f(i1 %cond) "coroutine.presplit"="1" personality i32 0 { +entry: + %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %size = tail call i64 @llvm.coro.size.i64() + %alloc = call i8* @malloc(i64 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp, label %coro.ret [ + i8 0, label %resume + i8 1, label %cleanup + ] + +resume: + br i1 %cond, label %invoke1, label %invoke2 + +invoke1: + invoke void @may_throw1() + to label %unreach unwind label %pad.with.phi +invoke2: + invoke void @may_throw2() + to label %unreach unwind label %pad.with.phi + +; Verify that we cloned landing pad on every edge and inserted a reload of the spilled value + +; CHECK: pad.with.phi.from.invoke2: +; CHECK: %0 = landingpad { i8*, i32 } +; CHECK: catch i8* null +; CHECK: br label %pad.with.phi + +; CHECK: pad.with.phi.from.invoke1: +; CHECK: %1 = landingpad { i8*, i32 } +; CHECK: catch i8* null +; CHECK: br label %pad.with.phi + +; CHECK: pad.with.phi: +; CHECK: %val = phi i32 [ 0, %pad.with.phi.from.invoke1 ], [ 1, %pad.with.phi.from.invoke2 ] +; CHECK: %lp = phi { i8*, i32 } [ %0, %pad.with.phi.from.invoke2 ], [ %1, %pad.with.phi.from.invoke1 ] +; CHECK: %exn = extractvalue { i8*, i32 } %lp, 0 +; CHECK: call i8* @__cxa_begin_catch(i8* %exn) +; CHECK: call void @use_val(i32 %val) +; CHECK: call void @__cxa_end_catch() +; CHECK: call void @free(i8* %vFrame) +; CHECK: ret void + +pad.with.phi: + %val = phi i32 [ 0, %invoke1 ], [ 1, %invoke2 ] + %lp = landingpad { i8*, i32 } + catch i8* null + %exn = extractvalue { i8*, i32 } %lp, 0 + call i8* @__cxa_begin_catch(i8* %exn) + call void @use_val(i32 %val) + call void @__cxa_end_catch() + br label %cleanup + +cleanup: ; preds = %invoke.cont15, %if.else, %if.then, %ehcleanup21, %init.suspend + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %coro.ret + +coro.ret: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void + +unreach: + unreachable +} + +; CHECK-LABEL: define internal fastcc void @g.resume( +define void @g(i1 %cond, i32 %x, i32 %y) "coroutine.presplit"="1" personality i32 0 { +entry: + %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %size = tail call i64 @llvm.coro.size.i64() + %alloc = call i8* @malloc(i64 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp, label %coro.ret [ + i8 0, label %resume + i8 1, label %cleanup + ] + +resume: + br i1 %cond, label %invoke1, label %invoke2 + +invoke1: + invoke void @may_throw1() + to label %unreach unwind label %pad.with.phi +invoke2: + invoke void @may_throw2() + to label %unreach unwind label %pad.with.phi + +; Verify that we created cleanuppads on every edge and inserted a reload of the spilled value + +; CHECK: pad.with.phi.from.invoke2: +; CHECK: %0 = cleanuppad within none [] +; CHECK: %y.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 6 +; CHECK: %y.reload = load i32, i32* %y.reload.addr +; CHECK: cleanupret from %0 unwind label %pad.with.phi + +; CHECK: pad.with.phi.from.invoke1: +; CHECK: %1 = cleanuppad within none [] +; CHECK: %x.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 5 +; CHECK: %x.reload = load i32, i32* %x.reload.addr +; CHECK: cleanupret from %1 unwind label %pad.with.phi + +; CHECK: pad.with.phi: +; CHECK: %val = phi i32 [ %x.reload, %pad.with.phi.from.invoke1 ], [ %y.reload, %pad.with.phi.from.invoke2 ] +; CHECK: %tok = cleanuppad within none [] +; CHECK: call void @use_val(i32 %val) +; CHECK: cleanupret from %tok unwind to caller + +pad.with.phi: + %val = phi i32 [ %x, %invoke1 ], [ %y, %invoke2 ] + %tok = cleanuppad within none [] + call void @use_val(i32 %val) + cleanupret from %tok unwind to caller + +cleanup: ; preds = %invoke.cont15, %if.else, %if.then, %ehcleanup21, %init.suspend + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %coro.ret + +coro.ret: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void + +unreach: + unreachable +} + +; CHECK-LABEL: define internal fastcc void @h.resume( +define void @h(i1 %cond, i32 %x, i32 %y) "coroutine.presplit"="1" personality i32 0 { +entry: + %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null) + %size = tail call i64 @llvm.coro.size.i64() + %alloc = call i8* @malloc(i64 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp, label %coro.ret [ + i8 0, label %resume + i8 1, label %cleanup + ] + +resume: + br i1 %cond, label %invoke1, label %invoke2 + +invoke1: + invoke void @may_throw1() + to label %coro.ret unwind label %pad.with.phi +invoke2: + invoke void @may_throw2() + to label %coro.ret unwind label %pad.with.phi + +; Verify that we created cleanuppads on every edge and inserted a reload of the spilled value + +; CHECK: pad.with.phi.from.invoke2: +; CHECK: %0 = cleanuppad within none [] +; CHECK: %y.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 6 +; CHECK: %y.reload = load i32, i32* %y.reload.addr +; CHECK: cleanupret from %0 unwind label %pad.with.phi + +; CHECK: pad.with.phi.from.invoke1: +; CHECK: %1 = cleanuppad within none [] +; CHECK: %x.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 5 +; CHECK: %x.reload = load i32, i32* %x.reload.addr +; CHECK: cleanupret from %1 unwind label %pad.with.phi + +; CHECK: pad.with.phi: +; CHECK: %val = phi i32 [ %x.reload, %pad.with.phi.from.invoke1 ], [ %y.reload, %pad.with.phi.from.invoke2 ] +; CHECK: %switch = catchswitch within none [label %catch] unwind to caller +pad.with.phi: + %val = phi i32 [ %x, %invoke1 ], [ %y, %invoke2 ] + %switch = catchswitch within none [label %catch] unwind to caller + +catch: ; preds = %catch.dispatch + %pad = catchpad within %switch [i8* null, i32 64, i8* null] + call void @use_val(i32 %val) + catchret from %pad to label %coro.ret + +cleanup: ; preds = %invoke.cont15, %if.else, %if.then, %ehcleanup21, %init.suspend + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %coro.ret + +coro.ret: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) +declare noalias i8* @malloc(i64) +declare i64 @llvm.coro.size.i64() +declare i8* @llvm.coro.begin(token, i8* writeonly) + +; Function Attrs: nounwind +declare token @llvm.coro.save(i8*) +declare i8 @llvm.coro.suspend(token, i1) + +; Function Attrs: argmemonly nounwind +declare void @may_throw1() +declare void @may_throw2() + +declare i8* @__cxa_begin_catch(i8*) + +declare void @use_val(i32) +declare void @__cxa_end_catch() + +; Function Attrs: nounwind +declare i1 @llvm.coro.end(i8*, i1) +declare void @free(i8*) +declare i8* @llvm.coro.free(token, i8* nocapture readonly) diff --git a/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll b/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll index 0769575759ba..05dc79db95ad 100644 --- a/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll +++ b/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll @@ -5,8 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.0" - -define i1 @rb_intern() nounwind ssp { +define i1 @rb_intern(i8 *%foo) nounwind ssp { ; CHECK-LABEL: @rb_intern( bb: @@ -19,7 +18,7 @@ bb1: br i1 undef, label %bb3, label %bb15 ; CHECK: bb1: -; CHECK: [[TMP:%.*]] = phi i8* [ getelementptr (i8, i8* null, i64 undef), %bb10 ], [ null, %bb ] +; CHECK: [[TMP:%.*]] = phi i8* [ %tmp14, %bb10 ], [ null, %bb ] ; CHECK: bb1.bb15_crit_edge: ; CHECK: %tmp17.pre = load i8, i8* [[TMP]], align 1 @@ -41,7 +40,7 @@ bb10: %tmp11 = load i8*, i8** %tmp, align 8 %tmp12 = load i8, i8* %tmp11, align 1 %tmp13 = zext i8 %tmp12 to i64 - %tmp14 = getelementptr inbounds i8, i8* null, i64 undef + %tmp14 = getelementptr inbounds i8, i8* %foo, i64 undef store i8* %tmp14, i8** %tmp, align 8 br label %bb1 diff --git a/test/Transforms/GVN/PRE/nonintegral.ll b/test/Transforms/GVN/PRE/nonintegral.ll new file mode 100644 index 000000000000..75a756e8af8c --- /dev/null +++ b/test/Transforms/GVN/PRE/nonintegral.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" +target triple = "x86_64-unknown-linux-gnu" + +define void @nipre(double addrspace(4)** noalias %p, i64 addrspace(4)** noalias %p2, i8 %jmp) { + +; CHECK-LABEL: @nipre( +; CHECK: [[PCAST:%.*]] = bitcast double addrspace(4)** [[P:%.*]] to i64 addrspace(4)** +; CHECK: a: +; CHECK: [[L1:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)** [[PCAST]] +; CHECK: [[TMP0:%.*]] = bitcast i64 addrspace(4)* [[L1]] to double addrspace(4)* +; CHECK: b: +; CHECK: [[L2:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)** [[PCAST]] +; CHECK: [[TMP1:%.*]] = bitcast i64 addrspace(4)* [[L2]] to double addrspace(4)* +; CHECK: c: +; CHECK-NEXT: [[L3_PRE:%.*]] = load double addrspace(4)*, double addrspace(4)** %p + +entry: + %pcast = bitcast double addrspace(4)** %p to i64 addrspace(4)** + switch i8 %jmp, label %c [ i8 0, label %a + i8 1, label %b] +a: + %l1 = load i64 addrspace(4)*, i64 addrspace(4)** %pcast + store i64 addrspace(4)* %l1, i64 addrspace(4)** %p2 + br label %tail +b: + %l2 = load i64 addrspace(4)*, i64 addrspace(4)** %pcast + store i64 addrspace(4)* %l2, i64 addrspace(4)** %p2 + br label %tail +c: + br label %tail +tail: + %l3 = load double addrspace(4)*, double addrspace(4)** %p + %l3cast = bitcast double addrspace(4)* %l3 to i64 addrspace(4)* + store i64 addrspace(4)* %l3cast, i64 addrspace(4)** %p2 + ret void +} diff --git a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll index 3d77a364f96f..49e5d24296c0 100644 --- a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll +++ b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll @@ -6,7 +6,7 @@ target triple = "thumbv7-apple-darwin" ; CHECK-LABEL: @test( ; CHECK: if.end.i126: -; CHECK: %exitcond = icmp ne i8* %incdec.ptr.i, getelementptr (i8, i8* null, i32 undef) +; CHECK: %exitcond = icmp ne i8* %incdec.ptr.i, null define void @test() nounwind { entry: br label %while.cond diff --git a/test/Transforms/InferFunctionAttrs/annotate.ll b/test/Transforms/InferFunctionAttrs/annotate.ll index 64676bf310bd..cb4b5cdd1e8c 100644 --- a/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/test/Transforms/InferFunctionAttrs/annotate.ll @@ -22,12 +22,138 @@ declare i32 @__nvvm_reflect(i8*) ; Use an opaque pointer type for all the (possibly opaque) structs. %opaque = type opaque +; CHECK: declare double @__acos_finite(double) +declare double @__acos_finite(double) + +; CHECK: declare float @__acosf_finite(float) +declare float @__acosf_finite(float) + +; CHECK: declare double @__acosh_finite(double) +declare double @__acosh_finite(double) + +; CHECK: declare float @__acoshf_finite(float) +declare float @__acoshf_finite(float) + +; CHECK: declare x86_fp80 @__acoshl_finite(x86_fp80) +declare x86_fp80 @__acoshl_finite(x86_fp80) + +; CHECK: declare x86_fp80 @__acosl_finite(x86_fp80) +declare x86_fp80 @__acosl_finite(x86_fp80) + +; CHECK: declare double @__asin_finite(double) +declare double @__asin_finite(double) + +; CHECK: declare float @__asinf_finite(float) +declare float @__asinf_finite(float) + +; CHECK: declare x86_fp80 @__asinl_finite(x86_fp80) +declare x86_fp80 @__asinl_finite(x86_fp80) + +; CHECK: declare double @__atan2_finite(double, double) +declare double @__atan2_finite(double, double) + +; CHECK: declare float @__atan2f_finite(float, float) +declare float @__atan2f_finite(float, float) + +; CHECK: declare x86_fp80 @__atan2l_finite(x86_fp80, x86_fp80) +declare x86_fp80 @__atan2l_finite(x86_fp80, x86_fp80) + +; CHECK: declare double @__atanh_finite(double) +declare double @__atanh_finite(double) + +; CHECK: declare float @__atanhf_finite(float) +declare float @__atanhf_finite(float) + +; CHECK: declare x86_fp80 @__atanhl_finite(x86_fp80) +declare x86_fp80 @__atanhl_finite(x86_fp80) + +; CHECK: declare double @__cosh_finite(double) +declare double @__cosh_finite(double) + +; CHECK: declare float @__coshf_finite(float) +declare float @__coshf_finite(float) + +; CHECK: declare x86_fp80 @__coshl_finite(x86_fp80) +declare x86_fp80 @__coshl_finite(x86_fp80) + ; CHECK: declare double @__cospi(double) declare double @__cospi(double) ; CHECK: declare float @__cospif(float) declare float @__cospif(float) +; CHECK: declare double @__exp10_finite(double) +declare double @__exp10_finite(double) + +; CHECK: declare float @__exp10f_finite(float) +declare float @__exp10f_finite(float) + +; CHECK: declare x86_fp80 @__exp10l_finite(x86_fp80) +declare x86_fp80 @__exp10l_finite(x86_fp80) + +; CHECK: declare double @__exp2_finite(double) +declare double @__exp2_finite(double) + +; CHECK: declare float @__exp2f_finite(float) +declare float @__exp2f_finite(float) + +; CHECK: declare x86_fp80 @__exp2l_finite(x86_fp80) +declare x86_fp80 @__exp2l_finite(x86_fp80) + +; CHECK: declare double @__exp_finite(double) +declare double @__exp_finite(double) + +; CHECK: declare float @__expf_finite(float) +declare float @__expf_finite(float) + +; CHECK: declare x86_fp80 @__expl_finite(x86_fp80) +declare x86_fp80 @__expl_finite(x86_fp80) + +; CHECK: declare double @__log10_finite(double) +declare double @__log10_finite(double) + +; CHECK: declare float @__log10f_finite(float) +declare float @__log10f_finite(float) + +; CHECK: declare x86_fp80 @__log10l_finite(x86_fp80) +declare x86_fp80 @__log10l_finite(x86_fp80) + +; CHECK: declare double @__log2_finite(double) +declare double @__log2_finite(double) + +; CHECK: declare float @__log2f_finite(float) +declare float @__log2f_finite(float) + +; CHECK: declare x86_fp80 @__log2l_finite(x86_fp80) +declare x86_fp80 @__log2l_finite(x86_fp80) + +; CHECK: declare double @__log_finite(double) +declare double @__log_finite(double) + +; CHECK: declare float @__logf_finite(float) +declare float @__logf_finite(float) + +; CHECK: declare x86_fp80 @__logl_finite(x86_fp80) +declare x86_fp80 @__logl_finite(x86_fp80) + +; CHECK: declare double @__pow_finite(double, double) +declare double @__pow_finite(double, double) + +; CHECK: declare float @__powf_finite(float, float) +declare float @__powf_finite(float, float) + +; CHECK: declare x86_fp80 @__powl_finite(x86_fp80, x86_fp80) +declare x86_fp80 @__powl_finite(x86_fp80, x86_fp80) + +; CHECK: declare double @__sinh_finite(double) +declare double @__sinh_finite(double) + +; CHECK: declare float @__sinhf_finite(float) +declare float @__sinhf_finite(float) + +; CHECK: declare x86_fp80 @__sinhl_finite(x86_fp80) +declare x86_fp80 @__sinhl_finite(x86_fp80) + ; CHECK: declare double @__sinpi(double) declare double @__sinpi(double) diff --git a/test/Transforms/InferFunctionAttrs/no-proto.ll b/test/Transforms/InferFunctionAttrs/no-proto.ll index 25a4805c367f..3cab0ab4bf40 100644 --- a/test/Transforms/InferFunctionAttrs/no-proto.ll +++ b/test/Transforms/InferFunctionAttrs/no-proto.ll @@ -3,12 +3,138 @@ ; Check that we don't modify libc functions with invalid prototypes. +; CHECK: declare void @__acos_finite(...) +declare void @__acos_finite(...) + +; CHECK: declare void @__acosf_finite(...) +declare void @__acosf_finite(...) + +; CHECK: declare void @__acosh_finite(...) +declare void @__acosh_finite(...) + +; CHECK: declare void @__acoshf_finite(...) +declare void @__acoshf_finite(...) + +; CHECK: declare void @__acoshl_finite(...) +declare void @__acoshl_finite(...) + +; CHECK: declare void @__acosl_finite(...) +declare void @__acosl_finite(...) + +; CHECK: declare void @__asin_finite(...) +declare void @__asin_finite(...) + +; CHECK: declare void @__asinf_finite(...) +declare void @__asinf_finite(...) + +; CHECK: declare void @__asinl_finite(...) +declare void @__asinl_finite(...) + +; CHECK: declare void @__atan2_finite(...) +declare void @__atan2_finite(...) + +; CHECK: declare void @__atan2f_finite(...) +declare void @__atan2f_finite(...) + +; CHECK: declare void @__atan2l_finite(...) +declare void @__atan2l_finite(...) + +; CHECK: declare void @__atanh_finite(...) +declare void @__atanh_finite(...) + +; CHECK: declare void @__atanhf_finite(...) +declare void @__atanhf_finite(...) + +; CHECK: declare void @__atanhl_finite(...) +declare void @__atanhl_finite(...) + +; CHECK: declare void @__cosh_finite(...) +declare void @__cosh_finite(...) + +; CHECK: declare void @__coshf_finite(...) +declare void @__coshf_finite(...) + +; CHECK: declare void @__coshl_finite(...) +declare void @__coshl_finite(...) + ; CHECK: declare void @__cospi(...) declare void @__cospi(...) ; CHECK: declare void @__cospif(...) declare void @__cospif(...) +; CHECK: declare void @__exp10_finite(...) +declare void @__exp10_finite(...) + +; CHECK: declare void @__exp10f_finite(...) +declare void @__exp10f_finite(...) + +; CHECK: declare void @__exp10l_finite(...) +declare void @__exp10l_finite(...) + +; CHECK: declare void @__exp2_finite(...) +declare void @__exp2_finite(...) + +; CHECK: declare void @__exp2f_finite(...) +declare void @__exp2f_finite(...) + +; CHECK: declare void @__exp2l_finite(...) +declare void @__exp2l_finite(...) + +; CHECK: declare void @__exp_finite(...) +declare void @__exp_finite(...) + +; CHECK: declare void @__expf_finite(...) +declare void @__expf_finite(...) + +; CHECK: declare void @__expl_finite(...) +declare void @__expl_finite(...) + +; CHECK: declare void @__log10_finite(...) +declare void @__log10_finite(...) + +; CHECK: declare void @__log10f_finite(...) +declare void @__log10f_finite(...) + +; CHECK: declare void @__log10l_finite(...) +declare void @__log10l_finite(...) + +; CHECK: declare void @__log2_finite(...) +declare void @__log2_finite(...) + +; CHECK: declare void @__log2f_finite(...) +declare void @__log2f_finite(...) + +; CHECK: declare void @__log2l_finite(...) +declare void @__log2l_finite(...) + +; CHECK: declare void @__log_finite(...) +declare void @__log_finite(...) + +; CHECK: declare void @__logf_finite(...) +declare void @__logf_finite(...) + +; CHECK: declare void @__logl_finite(...) +declare void @__logl_finite(...) + +; CHECK: declare void @__pow_finite(...) +declare void @__pow_finite(...) + +; CHECK: declare void @__powf_finite(...) +declare void @__powf_finite(...) + +; CHECK: declare void @__powl_finite(...) +declare void @__powl_finite(...) + +; CHECK: declare void @__sinh_finite(...) +declare void @__sinh_finite(...) + +; CHECK: declare void @__sinhf_finite(...) +declare void @__sinhf_finite(...) + +; CHECK: declare void @__sinhl_finite(...) +declare void @__sinhl_finite(...) + ; CHECK: declare void @__sinpi(...) declare void @__sinpi(...) diff --git a/test/Transforms/Inline/inline-cold.ll b/test/Transforms/Inline/inline-cold.ll index 93d2569d87ad..e0e679ad4036 100644 --- a/test/Transforms/Inline/inline-cold.ll +++ b/test/Transforms/Inline/inline-cold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -S -inlinecold-threshold=75 | FileCheck %s +; RUN: opt < %s -inline -S -inlinecold-threshold=25 | FileCheck %s ; Test that functions with attribute Cold are not inlined while the ; same function without attribute Cold will be inlined. @@ -64,23 +64,7 @@ entry: %x3 = add i32 %x2, %a3 %a4 = load volatile i32, i32* @a %x4 = add i32 %x3, %a4 - %a5 = load volatile i32, i32* @a - %x5 = add i32 %x4, %a5 - %a6 = load volatile i32, i32* @a - %x6 = add i32 %x5, %a6 - %a7 = load volatile i32, i32* @a - %x7 = add i32 %x6, %a6 - %a8 = load volatile i32, i32* @a - %x8 = add i32 %x7, %a8 - %a9 = load volatile i32, i32* @a - %x9 = add i32 %x8, %a9 - %a10 = load volatile i32, i32* @a - %x10 = add i32 %x9, %a10 - %a11 = load volatile i32, i32* @a - %x11 = add i32 %x10, %a11 - %a12 = load volatile i32, i32* @a - %x12 = add i32 %x11, %a12 - %add = add i32 %x12, %a + %add = add i32 %x4, %a ret i32 %add } diff --git a/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll b/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll index 1f2b143c97ee..b8d41abe1c35 100644 --- a/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll +++ b/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll @@ -6,7 +6,7 @@ target datalayout = "e-p3:32:32-p4:64:64-n32" @lds = internal addrspace(3) global [64 x i64] zeroinitializer ; CHECK-LABEL: @constexpr_addrspacecast_ptr_size_change( -; CHECK: load i64, i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*), i64 undef) +; CHECK: load i64, i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*) ; CHECK-NEXT: br define void @constexpr_addrspacecast_ptr_size_change() #0 { %tmp0 = call i32 @foo(i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*)) #1 diff --git a/test/Transforms/Inline/partial-inline-act.ll b/test/Transforms/Inline/partial-inline-act.ll index 916436260bd6..27e719153875 100644 --- a/test/Transforms/Inline/partial-inline-act.ll +++ b/test/Transforms/Inline/partial-inline-act.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -partial-inliner -disable-output +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -disable-output ; This testcase tests the assumption cache define internal i32 @inlinedFunc(i1 %cond, i32* align 4 %align.val) { diff --git a/test/Transforms/Inline/prof-update.ll b/test/Transforms/Inline/prof-update.ll index 3fefa1c56cea..4a4471e8e17a 100644 --- a/test/Transforms/Inline/prof-update.ll +++ b/test/Transforms/Inline/prof-update.ll @@ -6,21 +6,21 @@ declare void @ext1(); @func = global void ()* null ; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]] -define void @callee(i32 %n) !prof !1 { +define void @callee(i32 %n) !prof !15 { %cond = icmp sle i32 %n, 10 br i1 %cond, label %cond_true, label %cond_false cond_true: ; ext1 is optimized away, thus not updated. ; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]] - call void @ext1(), !prof !2 + call void @ext1(), !prof !16 ret void cond_false: ; ext is cloned and updated. ; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]] - call void @ext(), !prof !2 + call void @ext(), !prof !16 %f = load void ()*, void ()** @func ; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]] - call void %f(), !prof !4 + call void %f(), !prof !18 ret void } @@ -28,16 +28,29 @@ cond_false: define void @caller() { ; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]] ; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]] - call void @callee(i32 15), !prof !3 + call void @callee(i32 15), !prof !17 ret void } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 2000} -!1 = !{!"function_entry_count", i64 1000} -!2 = !{!"branch_weights", i64 2000} -!3 = !{!"branch_weights", i64 400} -!4 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 2000} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"branch_weights", i64 2000} +!17 = !{!"branch_weights", i64 400} +!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} attributes #0 = { alwaysinline } ; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} ; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll deleted file mode 100644 index 39408a2d394c..000000000000 --- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll +++ /dev/null @@ -1,135 +0,0 @@ -; RUN: opt -S -instcombine < %s | FileCheck %s - -define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind - ret <4 x i32> %a -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> zeroinitializer -} - -define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> ) nounwind - ret <4 x i32> %a -; CHECK: entry: -; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> -; CHECK-NEXT: ret <4 x i32> %a -} - -define <4 x i32> @constantMul() nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind - ret <4 x i32> %a -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @constantMulS() nounwind readnone ssp { -entry: - %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @constantMulU() nounwind readnone ssp { -entry: - %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> , <4 x i16> ) nounwind - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> %x) nounwind - %b = add <4 x i32> zeroinitializer, %a - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> %x) [[NUW:#[0-9]+]] -; CHECK-NEXT: ret <4 x i32> %a -} - -define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind - %b = add <4 x i32> %x, %a - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: %b = add <4 x i32> %x, -; CHECK-NEXT: ret <4 x i32> %b -} - -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone - -; ARM64 variants - - -define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind - ret <4 x i32> %a -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> zeroinitializer -} - -define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> ) nounwind - ret <4 x i32> %a -; CHECK: entry: -; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> -; CHECK-NEXT: ret <4 x i32> %a -} - -define <4 x i32> @constantMulARM64() nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind - ret <4 x i32> %a -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @constantMulSARM64() nounwind readnone ssp { -entry: - %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @constantMulUARM64() nounwind readnone ssp { -entry: - %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> , <4 x i16> ) nounwind - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) nounwind - %b = add <4 x i32> zeroinitializer, %a - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) [[NUW:#[0-9]+]] -; CHECK-NEXT: ret <4 x i32> %a -} - -define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind - %b = add <4 x i32> %x, %a - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: %b = add <4 x i32> %x, -; CHECK-NEXT: ret <4 x i32> %b -} - -declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone - -; CHECK: attributes #0 = { nounwind readnone ssp } -; CHECK: attributes #1 = { nounwind readnone } -; CHECK: attributes [[NUW]] = { nounwind } diff --git a/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll new file mode 100644 index 000000000000..04fb7d91193a --- /dev/null +++ b/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll @@ -0,0 +1,71 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +; ARM64 neon intrinsic variants - +; REQUIRES: aarch64 + +define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> zeroinitializer +} + +define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> ) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> +; CHECK-NEXT: ret <4 x i32> %a +} + +define <4 x i32> @constantMulARM64() nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @constantMulSARM64() nounwind readnone ssp { +entry: + %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @constantMulUARM64() nounwind readnone ssp { +entry: + %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) nounwind + %b = add <4 x i32> zeroinitializer, %a + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> %x) [[NUW:#[0-9]+]] +; CHECK-NEXT: ret <4 x i32> %a +} + +define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind + %b = add <4 x i32> %x, %a + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: %b = add <4 x i32> %x, +; CHECK-NEXT: ret <4 x i32> %b +} + +declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone + +; CHECK: attributes #0 = { nounwind readnone ssp } +; CHECK: attributes #1 = { nounwind readnone } +; CHECK: attributes [[NUW]] = { nounwind } diff --git a/test/Transforms/InstCombine/AArch64/lit.local.cfg b/test/Transforms/InstCombine/AArch64/lit.local.cfg new file mode 100644 index 000000000000..7184443994b6 --- /dev/null +++ b/test/Transforms/InstCombine/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll new file mode 100644 index 000000000000..1901997c5521 --- /dev/null +++ b/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -0,0 +1,1540 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +; -------------------------------------------------------------------- +; llvm.amdgcn.rcp +; -------------------------------------------------------------------- + +declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone +declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone + +; CHECK-LABEL: @test_constant_fold_rcp_f32_undef +; CHECK-NEXT: ret float undef +define float @test_constant_fold_rcp_f32_undef() nounwind { + %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f32_1 +; CHECK-NEXT: ret float 1.000000e+00 +define float @test_constant_fold_rcp_f32_1() nounwind { + %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f64_1 +; CHECK-NEXT: ret double 1.000000e+00 +define double @test_constant_fold_rcp_f64_1() nounwind { + %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f32_half +; CHECK-NEXT: ret float 2.000000e+00 +define float @test_constant_fold_rcp_f32_half() nounwind { + %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f64_half +; CHECK-NEXT: ret double 2.000000e+00 +define double @test_constant_fold_rcp_f64_half() nounwind { + %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f32_43 +; CHECK-NEXT: call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) +define float @test_constant_fold_rcp_f32_43() nounwind { + %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f64_43 +; CHECK-NEXT: call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) +define double @test_constant_fold_rcp_f64_43() nounwind { + %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone + ret double %val +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.rsq +; -------------------------------------------------------------------- + +declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone + +; CHECK-LABEL: @test_constant_fold_rsq_f32_undef +; CHECK-NEXT: ret float undef +define float @test_constant_fold_rsq_f32_undef() nounwind { + %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone + ret float %val +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.frexp.mant +; -------------------------------------------------------------------- + +declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone +declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone + + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef( +; CHECK-NEXT: ret float undef +define float @test_constant_fold_frexp_mant_f32_undef() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float undef) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef( +; CHECK-NEXT: ret double undef +define double @test_constant_fold_frexp_mant_f64_undef() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double undef) + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0( +; CHECK-NEXT: ret float 0.000000e+00 +define float @test_constant_fold_frexp_mant_f32_0() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0( +; CHECK-NEXT: ret double 0.000000e+00 +define double @test_constant_fold_frexp_mant_f64_0() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0) + ret double %val +} + + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0( +; CHECK-NEXT: ret float -0.000000e+00 +define float @test_constant_fold_frexp_mant_f32_n0() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0( +; CHECK-NEXT: ret double -0.000000e+00 +define double @test_constant_fold_frexp_mant_f64_n0() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0) + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1( +; CHECK-NEXT: ret float 5.000000e-01 +define float @test_constant_fold_frexp_mant_f32_1() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1( +; CHECK-NEXT: ret double 5.000000e-01 +define double @test_constant_fold_frexp_mant_f64_1() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0) + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1( +; CHECK-NEXT: ret float -5.000000e-01 +define float @test_constant_fold_frexp_mant_f32_n1() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1( +; CHECK-NEXT: ret double -5.000000e-01 +define double @test_constant_fold_frexp_mant_f64_n1() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0) + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan( +; CHECK-NEXT: ret float 0x7FF8000000000000 +define float @test_constant_fold_frexp_mant_f32_nan() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan( +; CHECK-NEXT: ret double 0x7FF8000000000000 +define double @test_constant_fold_frexp_mant_f64_nan() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000) + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf( +; CHECK-NEXT: ret float 0x7FF0000000000000 +define float @test_constant_fold_frexp_mant_f32_inf() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf( +; CHECK-NEXT: ret double 0x7FF0000000000000 +define double @test_constant_fold_frexp_mant_f64_inf() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000) + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf( +; CHECK-NEXT: ret float 0xFFF0000000000000 +define float @test_constant_fold_frexp_mant_f32_ninf() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf( +; CHECK-NEXT: ret double 0xFFF0000000000000 +define double @test_constant_fold_frexp_mant_f64_ninf() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000) + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num( +; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 +define float @test_constant_fold_frexp_mant_f32_max_num() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num( +; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF +define double @test_constant_fold_frexp_mant_f64_max_num() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF) + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num( +; CHECK-NEXT: ret float 5.000000e-01 +define float @test_constant_fold_frexp_mant_f32_min_num() nounwind { + %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000) + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num( +; CHECK-NEXT: ret double 5.000000e-01 +define double @test_constant_fold_frexp_mant_f64_min_num() nounwind { + %val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324) + ret double %val +} + + +; -------------------------------------------------------------------- +; llvm.amdgcn.frexp.exp +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone +declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef( +; CHECK-NEXT: ret i32 undef +define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef( +; CHECK-NEXT: ret i32 undef +define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f32_0() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f64_0() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024( +; CHECK-NEXT: ret i32 11 +define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024( +; CHECK-NEXT: ret i32 11 +define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024( +; CHECK-NEXT: ret i32 11 +define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024( +; CHECK-NEXT: ret i32 11 +define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024( +; CHECK-NEXT: ret i32 -9 +define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024( +; CHECK-NEXT: ret i32 -9 +define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf( +; CHECK-NEXT: ret i32 0 +define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num( +; CHECK-NEXT: ret i32 128 +define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num( +; CHECK-NEXT: ret i32 1024 +define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num( +; CHECK-NEXT: ret i32 -148 +define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000) + ret i32 %val +} + +; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num( +; CHECK-NEXT: ret i32 -1073 +define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind { + %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324) + ret i32 %val +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.class +; -------------------------------------------------------------------- + +declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone +declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone + +; CHECK-LABEL: @test_class_undef_mask_f32( +; CHECK: ret i1 false +define i1 @test_class_undef_mask_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef) + ret i1 %val +} + +; CHECK-LABEL: @test_class_over_max_mask_f32( +; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1) +define i1 @test_class_over_max_mask_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025) + ret i1 %val +} + +; CHECK-LABEL: @test_class_no_mask_f32( +; CHECK: ret i1 false +define i1 @test_class_no_mask_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0) + ret i1 %val +} + +; CHECK-LABEL: @test_class_full_mask_f32( +; CHECK: ret i1 true +define i1 @test_class_full_mask_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023) + ret i1 %val +} + +; CHECK-LABEL: @test_class_undef_no_mask_f32( +; CHECK: ret i1 false +define i1 @test_class_undef_no_mask_f32() nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0) + ret i1 %val +} + +; CHECK-LABEL: @test_class_undef_full_mask_f32( +; CHECK: ret i1 true +define i1 @test_class_undef_full_mask_f32() nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023) + ret i1 %val +} + +; CHECK-LABEL: @test_class_undef_val_f32( +; CHECK: ret i1 undef +define i1 @test_class_undef_val_f32() nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_class_undef_undef_f32( +; CHECK: ret i1 undef +define i1 @test_class_undef_undef_f32() nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef) + ret i1 %val +} + +; CHECK-LABEL: @test_class_var_mask_f32( +; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask) +define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask) + ret i1 %val +} + +; CHECK-LABEL: @test_class_isnan_f32( +; CHECK: %val = fcmp uno float %x, 0.000000e+00 +define i1 @test_class_isnan_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_snan_test_snan_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_snan_test_snan_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_qnan_test_qnan_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_qnan_test_snan_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_ninf_test_ninf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_pinf_test_ninf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_qnan_test_ninf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_snan_test_ninf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_nzero_test_nzero_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_pzero_test_nzero_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_pzero_test_pzero_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_nzero_test_pzero_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_pinf_test_pinf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_ninf_test_pinf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_qnan_test_pinf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_snan_test_pinf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512) + ret i1 %val +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.cos +; -------------------------------------------------------------------- +declare float @llvm.amdgcn.cos.f32(float) nounwind readnone +declare float @llvm.fabs.f32(float) nounwind readnone + +; CHECK-LABEL: @cos_fneg_f32( +; CHECK: %cos = call float @llvm.amdgcn.cos.f32(float %x) +; CHECK-NEXT: ret float %cos +define float @cos_fneg_f32(float %x) { + %x.fneg = fsub float -0.0, %x + %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg) + ret float %cos +} + +; CHECK-LABEL: @cos_fabs_f32( +; CHECK-NEXT: %cos = call float @llvm.amdgcn.cos.f32(float %x) +; CHECK-NEXT: ret float %cos +define float @cos_fabs_f32(float %x) { + %x.fabs = call float @llvm.fabs.f32(float %x) + %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs) + ret float %cos +} + +; CHECK-LABEL: @cos_fabs_fneg_f32( +; CHECK-NEXT: %cos = call float @llvm.amdgcn.cos.f32(float %x) +; CHECK-NEXT: ret float %cos +define float @cos_fabs_fneg_f32(float %x) { + %x.fabs = call float @llvm.fabs.f32(float %x) + %x.fabs.fneg = fsub float -0.0, %x.fabs + %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg) + ret float %cos +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.cvt.pkrtz +; -------------------------------------------------------------------- + +declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone + +; CHECK-LABEL: @vars_lhs_cvt_pkrtz( +; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y) +define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) { + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y) + ret <2 x half> %cvt +} + +; CHECK-LABEL: @constant_lhs_cvt_pkrtz( +; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float %y) +define <2 x half> @constant_lhs_cvt_pkrtz(float %y) { + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y) + ret <2 x half> %cvt +} + +; CHECK-LABEL: @constant_rhs_cvt_pkrtz( +; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.000000e+00) +define <2 x half> @constant_rhs_cvt_pkrtz(float %x) { + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0) + ret <2 x half> %cvt +} + +; CHECK-LABEL: @undef_lhs_cvt_pkrtz( +; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y) +define <2 x half> @undef_lhs_cvt_pkrtz(float %y) { + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y) + ret <2 x half> %cvt +} + +; CHECK-LABEL: @undef_rhs_cvt_pkrtz( +; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef) +define <2 x half> @undef_rhs_cvt_pkrtz(float %x) { + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef) + ret <2 x half> %cvt +} + +; CHECK-LABEL: @undef_cvt_pkrtz( +; CHECK: ret <2 x half> undef +define <2 x half> @undef_cvt_pkrtz() { + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef) + ret <2 x half> %cvt +} + +; CHECK-LABEL: @constant_splat0_cvt_pkrtz( +; CHECK: ret <2 x half> zeroinitializer +define <2 x half> @constant_splat0_cvt_pkrtz() { + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0) + ret <2 x half> %cvt +} + +; CHECK-LABEL: @constant_cvt_pkrtz( +; CHECK: ret <2 x half> +define <2 x half> @constant_cvt_pkrtz() { + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0) + ret <2 x half> %cvt +} + +; Test constant values where rtz changes result +; CHECK-LABEL: @constant_rtz_pkrtz( +; CHECK: ret <2 x half> +define <2 x half> @constant_rtz_pkrtz() { + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0) + ret <2 x half> %cvt +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.ubfe +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone +declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone + +; CHECK-LABEL: @ubfe_var_i32( +; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width) +define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32( +; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 5, i32 %width) +define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32( +; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 5) +define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_width_0( +; CHECK-NEXT: ret i32 0 +define i32 @ubfe_width_0(i32 %src, i32 %offset) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_width_31( +; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31) +define i32 @ubfe_width_31(i32 %src, i32 %offset) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_width_32( +; CHECK-NEXT: ret i32 0 +define i32 @ubfe_width_32(i32 %src, i32 %offset) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_width_33( +; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 1) +define i32 @ubfe_width_33(i32 %src, i32 %offset) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_offset_33( +; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 1, i32 %width) +define i32 @ubfe_offset_33(i32 %src, i32 %width) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_offset_0( +; CHECK-NEXT: %1 = sub i32 32, %width +; CHECK-NEXT: %2 = shl i32 %src, %1 +; CHECK-NEXT: %bfe = lshr i32 %2, %1 +; CHECK-NEXT: ret i32 %bfe +define i32 @ubfe_offset_0(i32 %src, i32 %width) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_offset_32( +; CHECK-NEXT: %1 = sub i32 32, %width +; CHECK-NEXT: %2 = shl i32 %src, %1 +; CHECK-NEXT: %bfe = lshr i32 %2, %1 +; CHECK-NEXT: ret i32 %bfe +define i32 @ubfe_offset_32(i32 %src, i32 %width) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_offset_31( +; CHECK-NEXT: %1 = sub i32 32, %width +; CHECK-NEXT: %2 = shl i32 %src, %1 +; CHECK-NEXT: %bfe = lshr i32 %2, %1 +; CHECK-NEXT: ret i32 %bfe +define i32 @ubfe_offset_31(i32 %src, i32 %width) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_offset_0_width_0( +; CHECK-NEXT: ret i32 0 +define i32 @ubfe_offset_0_width_0(i32 %src) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_offset_0_width_3( +; CHECK-NEXT: and i32 %src, 7 +; CHECK-NEXT: ret +define i32 @ubfe_offset_0_width_3(i32 %src) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_offset_3_width_1( +; CHECK-NEXT: %1 = lshr i32 %src, 3 +; CHECK-NEXT: and i32 %1, 1 +; CHECK-NEXT: ret i32 +define i32 @ubfe_offset_3_width_1(i32 %src) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_offset_3_width_4( +; CHECK-NEXT: %1 = lshr i32 %src, 3 +; CHECK-NEXT: and i32 %1, 15 +; CHECK-NEXT: ret i32 +define i32 @ubfe_offset_3_width_4(i32 %src) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_0_0_0( +; CHECK-NEXT: ret i32 0 +define i32 @ubfe_0_0_0() { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_neg1_5_7( +; CHECK-NEXT: ret i32 127 +define i32 @ubfe_neg1_5_7() { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_undef_src_i32( +; CHECK-NEXT: ret i32 undef +define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_undef_offset_i32( +; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width) +define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_undef_width_i32( +; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef) +define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) { + %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef) + ret i32 %bfe +} + +; CHECK-LABEL: @ubfe_offset_33_width_4_i64( +; CHECK-NEXT: %1 = lshr i64 %src, 33 +; CHECK-NEXT: %bfe = and i64 %1, 15 +define i64 @ubfe_offset_33_width_4_i64(i64 %src) { + %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4) + ret i64 %bfe +} + +; CHECK-LABEL: @ubfe_offset_0_i64( +; CHECK-NEXT: %1 = sub i32 64, %width +; CHECK-NEXT: %2 = zext i32 %1 to i64 +; CHECK-NEXT: %3 = shl i64 %src, %2 +; CHECK-NEXT: %bfe = lshr i64 %3, %2 +; CHECK-NEXT: ret i64 %bfe +define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) { + %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width) + ret i64 %bfe +} + +; CHECK-LABEL: @ubfe_offset_32_width_32_i64( +; CHECK-NEXT: %bfe = lshr i64 %src, 32 +; CHECK-NEXT: ret i64 %bfe +define i64 @ubfe_offset_32_width_32_i64(i64 %src) { + %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32) + ret i64 %bfe +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.sbfe +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone +declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone + +; CHECK-LABEL: @sbfe_offset_31( +; CHECK-NEXT: %1 = sub i32 32, %width +; CHECK-NEXT: %2 = shl i32 %src, %1 +; CHECK-NEXT: %bfe = ashr i32 %2, %1 +; CHECK-NEXT: ret i32 %bfe +define i32 @sbfe_offset_31(i32 %src, i32 %width) { + %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 32, i32 %width) + ret i32 %bfe +} + +; CHECK-LABEL: @sbfe_neg1_5_7( +; CHECK-NEXT: ret i32 -1 +define i32 @sbfe_neg1_5_7() { + %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7) + ret i32 %bfe +} + +; CHECK-LABEL: @sbfe_offset_32_width_32_i64( +; CHECK-NEXT: %bfe = ashr i64 %src, 32 +; CHECK-NEXT: ret i64 %bfe +define i64 @sbfe_offset_32_width_32_i64(i64 %src) { + %bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32) + ret i64 %bfe +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.exp +; -------------------------------------------------------------------- + +declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly + +; Make sure no crashing on invalid variable params +; CHECK-LABEL: @exp_invalid_inputs( +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) +define void @exp_invalid_inputs(i32 %tgt, i32 %en) { + call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + ret void +} + +; CHECK-LABEL: @exp_disabled_inputs_to_undef( +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false) + +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float undef, float undef, float undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float %y, float undef, float undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float %z, float undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float %w, i1 true, i1 false) + +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false) + +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false) +; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false) +define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) { + ; enable src0..src3 constants + call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + + ; enable src0..src3 variables + call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false) + + ; enable none + call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false) + + ; enable different source combinations + call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) + + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.exp.compr +; -------------------------------------------------------------------- + +declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind inaccessiblememonly + +; CHECK-LABEL: @exp_compr_invalid_inputs( +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false) +define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en) { + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false) + ret void +} + +; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef( +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> , <2 x half> undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> , <2 x half> undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> , <2 x half> undef, i1 true, i1 false) + +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> undef, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> undef, i1 true, i1 false) + +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> %zw, i1 true, i1 false) +; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) +define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) { + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> , <2 x half> , i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> , <2 x half> , i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> , <2 x half> , i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> , <2 x half> , i1 true, i1 false) + + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) + + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.fmed3 +; -------------------------------------------------------------------- + +declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone + +; CHECK-LABEL: @fmed3_f32( +; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z) +define float @fmed3_f32(float %x, float %y, float %z) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00) +define float @fmed3_canonicalize_x_c0_c1_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00) +define float @fmed3_canonicalize_c0_x_c1_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00) +define float @fmed3_canonicalize_c0_c1_x_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00) +define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32( +; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00) +define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32( +; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00) +define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_undef_x_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_undef_x_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32( +; CHECK: call nnan float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) { + %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_undef_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_x_undef_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_y_undef_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_x_y_undef_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_qnan0_x_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_qnan0_x_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_qnan0_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_x_qnan0_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_y_qnan0_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_x_y_qnan0_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_qnan1_x_y_f32( +; CHECK: call float @llvm.minnum.f32(float %x, float %y) +define float @fmed3_qnan1_x_y_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y) + ret float %med3 +} + +; This can return any of the qnans. +; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32( +; CHECK: ret float 0x7FF8002000000000 +define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src0_0_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src0_0_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src0_1_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src0_1_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src1_0_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src1_0_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src1_1_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src1_1_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src2_0_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src2_0_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_constant_src2_1_f32( +; CHECK: ret float 5.000000e-01 +define float @fmed3_constant_src2_1_f32(float %x, float %y) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32( +; CHECK: ret float %x +define float @fmed3_x_qnan0_qnan1_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32( +; CHECK: ret float %x +define float @fmed3_qnan0_x_qnan1_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000) + ret float %med3 +} + +; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32( +; CHECK: ret float %x +define float @fmed3_qnan0_qnan1_x_f32(float %x) { + %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x) + ret float %med3 +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.icmp +; -------------------------------------------------------------------- + +declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent +declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent + +; Make sure there's no crash for invalid input +; CHECK-LABEL: @invalid_nonconstant_icmp_code( +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) +define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) + ret i64 %result +} + +; CHECK-LABEL: @invalid_icmp_code( +; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31) +; CHECK: %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42) +define i64 @invalid_icmp_code(i32 %a, i32 %b) { + %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31) + %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42) + %or = or i64 %under, %over + ret i64 %or +} + +; CHECK-LABEL: @icmp_constant_inputs_false( +; CHECK: ret i64 0 +define i64 @icmp_constant_inputs_false() { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 32) + ret i64 %result +} + +; CHECK-LABEL: @icmp_constant_inputs_true( +; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5 +define i64 @icmp_constant_inputs_true() { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34) + ret i64 %result +} + +; CHECK-LABEL: @icmp_constant_to_rhs_slt( +; CHECK: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %x, i32 9, i32 38) +define i64 @icmp_constant_to_rhs_slt(i32 %x) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 %x, i32 40) + ret i64 %result +} + +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32( +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32) +define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) { + %cmp = icmp eq i32 %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32( +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33) +define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) { + %cmp = icmp ne i32 %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32( +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 41) +define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) { + %cmp = icmp sle i32 %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64( +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34) +define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) { + %cmp = icmp ugt i64 %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64( +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34) +define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) { + %cmp = icmp ugt i64 %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 0, i32 %zext.cmp, i32 33) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32( +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 1) +define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) { + %cmp = fcmp oeq float %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32( +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14) +define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) { + %cmp = fcmp une float %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64( +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 4) +define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) { + %cmp = fcmp olt double %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32( +; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32) +define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) { + %cmp = icmp eq i32 %a, %b + %sext.cmp = sext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 0, i32 33) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32( +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33) +define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) { + %cmp = icmp eq i32 %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32( +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39) +define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) { + %cmp = icmp slt i32 %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32( +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14) +define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) { + %cmp = fcmp oeq float %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32( +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 2) +define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) { + %cmp = fcmp ule float %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32( +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 13) +define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) { + %cmp = fcmp ogt float %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32( +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32) +define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) { + %cmp = icmp eq i32 %a, %b + %zext.cmp = zext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 1, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32( +; CHECK: %zext.cond = zext i1 %cond to i32 +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 0, i32 33) +define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) { + %zext.cond = zext i1 %cond to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 1, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32( +; CHECK: %zext.cond = zext i1 %cond to i32 +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32) +define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) { + %zext.cond = zext i1 %cond to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32( +; CHECK: %sext.cond = sext i1 %cond to i32 +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32) +define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) { + %sext.cond = sext i1 %cond to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32( +; CHECK: %sext.cond = sext i1 %cond to i32 +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 0, i32 33) +define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) { + %sext.cond = sext i1 %cond to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 -1, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64( +; CHECK: %sext.cond = sext i1 %cond to i64 +; CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 0, i32 33) +define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) { + %sext.cond = sext i1 %cond to i64 + %mask = call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 -1, i32 32) + ret i64 %mask +} + +; TODO: Should be able to fold to false +; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32( +; CHECK: %cmp = icmp eq i32 %a, %b +; CHECK: %sext.cmp = sext i1 %cmp to i32 +; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32) +define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) { + %cmp = icmp eq i32 %a, %b + %sext.cmp = sext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32( +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32) +define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) { + %cmp = icmp eq i32 %a, %b + %sext.cmp = sext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32( +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39) +define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) { + %cmp = icmp sge i32 %a, %b + %sext.cmp = sext i1 %cmp to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32) + ret i64 %mask +} + +; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32( +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 38) +define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) { + %cmp = icmp sle i32 %a, %b + %not = xor i1 %cmp, true + %zext.cmp = zext i1 %not to i32 + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + ret i64 %mask +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.fcmp +; -------------------------------------------------------------------- + +declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent + +; Make sure there's no crash for invalid input +; CHECK-LABEL: @invalid_nonconstant_fcmp_code( +; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) +define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) + ret i64 %result +} + +; CHECK-LABEL: @invalid_fcmp_code( +; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1) +; CHECK: %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16) +define i64 @invalid_fcmp_code(float %a, float %b) { + %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1) + %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16) + %or = or i64 %under, %over + ret i64 %or +} + +; CHECK-LABEL: @fcmp_constant_inputs_false( +; CHECK: ret i64 0 +define i64 @fcmp_constant_inputs_false() { + %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 1) + ret i64 %result +} + +; CHECK-LABEL: @fcmp_constant_inputs_true( +; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5 +define i64 @fcmp_constant_inputs_true() { + %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4) + ret i64 %result +} + +; CHECK-LABEL: @fcmp_constant_to_rhs_olt( +; CHECK: %result = call i64 @llvm.amdgcn.fcmp.f32(float %x, float 4.000000e+00, i32 2) +define i64 @fcmp_constant_to_rhs_olt(float %x) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4) + ret i64 %result +} + +; CHECK: attributes #5 = { convergent } diff --git a/test/Transforms/InstCombine/AMDGPU/lit.local.cfg b/test/Transforms/InstCombine/AMDGPU/lit.local.cfg new file mode 100644 index 000000000000..2a665f06be72 --- /dev/null +++ b/test/Transforms/InstCombine/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll new file mode 100644 index 000000000000..9efed367d19f --- /dev/null +++ b/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll @@ -0,0 +1,65 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> zeroinitializer +} + +define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> ) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> +; CHECK-NEXT: ret <4 x i32> %a +} + +define <4 x i32> @constantMul() nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @constantMulS() nounwind readnone ssp { +entry: + %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @constantMulU() nounwind readnone ssp { +entry: + %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> %x) nounwind + %b = add <4 x i32> zeroinitializer, %a + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> %x) [[NUW:#[0-9]+]] +; CHECK-NEXT: ret <4 x i32> %a +} + +define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind + %b = add <4 x i32> %x, %a + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: %b = add <4 x i32> %x, +; CHECK-NEXT: ret <4 x i32> %b +} + +declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone diff --git a/test/Transforms/InstCombine/ARM/constant-fold-hang.ll b/test/Transforms/InstCombine/ARM/constant-fold-hang.ll new file mode 100644 index 000000000000..2ca6b86ccc2f --- /dev/null +++ b/test/Transforms/InstCombine/ARM/constant-fold-hang.ll @@ -0,0 +1,14 @@ +; RUN: opt -instcombine < %s + +; Function Attrs: nounwind readnone ssp +define void @mulByZero(<4 x i16> %x) #0 { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) #2 + ret void +} + +; Function Attrs: nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) #1 + +attributes #0 = { nounwind readnone ssp } +attributes #1 = { nounwind readnone } diff --git a/test/Transforms/InstCombine/ARM/lit.local.cfg b/test/Transforms/InstCombine/ARM/lit.local.cfg new file mode 100644 index 000000000000..236e1d344166 --- /dev/null +++ b/test/Transforms/InstCombine/ARM/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/InstCombine/ARM/neon-intrinsics.ll b/test/Transforms/InstCombine/ARM/neon-intrinsics.ll new file mode 100644 index 000000000000..d22fa9c811dc --- /dev/null +++ b/test/Transforms/InstCombine/ARM/neon-intrinsics.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; The alignment arguments for NEON load/store intrinsics can be increased +; by instcombine. Check for this. + +; CHECK: vld4.v2i32.p0i8({{.*}}, i32 32) +; CHECK: vst4.p0i8.v2i32({{.*}}, i32 16) + +@x = common global [8 x i32] zeroinitializer, align 32 +@y = common global [8 x i32] zeroinitializer, align 16 + +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } + +define void @test() nounwind ssp { + %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* bitcast ([8 x i32]* @x to i8*), i32 1) + %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1 + %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 + %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3 + call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1) + ret void +} + +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst4.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind diff --git a/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll b/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll new file mode 100644 index 000000000000..10b4e4d62631 --- /dev/null +++ b/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll @@ -0,0 +1,131 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1 + +define <4 x i32> @test1(<4 x i32>* %h) #0 { +entry: + %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv) + +; CHECK-LABEL: @test1 +; CHECK: @llvm.ppc.altivec.lvx +; CHECK: ret <4 x i32> + + %v0 = load <4 x i32>, <4 x i32>* %h, align 8 + %a = add <4 x i32> %v0, %vl + ret <4 x i32> %a +} + +define <4 x i32> @test1a(<4 x i32>* align 16 %h) #0 { +entry: + %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv) + +; CHECK-LABEL: @test1a +; CHECK-NOT: @llvm.ppc.altivec.lvx +; CHECK: ret <4 x i32> + + %v0 = load <4 x i32>, <4 x i32>* %h, align 8 + %a = add <4 x i32> %v0, %vl + ret <4 x i32> %a +} + +declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0 + +define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 { +entry: + %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv) + + %v0 = load <4 x i32>, <4 x i32>* %h, align 8 + ret <4 x i32> %v0 + +; CHECK-LABEL: @test2 +; CHECK: @llvm.ppc.altivec.stvx +; CHECK: ret <4 x i32> +} + +define <4 x i32> @test2a(<4 x i32>* align 16 %h, <4 x i32> %d) #0 { +entry: + %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv) + + %v0 = load <4 x i32>, <4 x i32>* %h, align 8 + ret <4 x i32> %v0 + +; CHECK-LABEL: @test2 +; CHECK-NOT: @llvm.ppc.altivec.stvx +; CHECK: ret <4 x i32> +} + +declare <4 x i32> @llvm.ppc.altivec.lvxl(i8*) #1 + +define <4 x i32> @test1l(<4 x i32>* %h) #0 { +entry: + %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv) + +; CHECK-LABEL: @test1l +; CHECK: @llvm.ppc.altivec.lvxl +; CHECK: ret <4 x i32> + + %v0 = load <4 x i32>, <4 x i32>* %h, align 8 + %a = add <4 x i32> %v0, %vl + ret <4 x i32> %a +} + +define <4 x i32> @test1la(<4 x i32>* align 16 %h) #0 { +entry: + %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv) + +; CHECK-LABEL: @test1la +; CHECK-NOT: @llvm.ppc.altivec.lvxl +; CHECK: ret <4 x i32> + + %v0 = load <4 x i32>, <4 x i32>* %h, align 8 + %a = add <4 x i32> %v0, %vl + ret <4 x i32> %a +} + +declare void @llvm.ppc.altivec.stvxl(<4 x i32>, i8*) #0 + +define <4 x i32> @test2l(<4 x i32>* %h, <4 x i32> %d) #0 { +entry: + %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv) + + %v0 = load <4 x i32>, <4 x i32>* %h, align 8 + ret <4 x i32> %v0 + +; CHECK-LABEL: @test2l +; CHECK: @llvm.ppc.altivec.stvxl +; CHECK: ret <4 x i32> +} + +define <4 x i32> @test2la(<4 x i32>* align 16 %h, <4 x i32> %d) #0 { +entry: + %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv) + + %v0 = load <4 x i32>, <4 x i32>* %h, align 8 + ret <4 x i32> %v0 + +; CHECK-LABEL: @test2l +; CHECK-NOT: @llvm.ppc.altivec.stvxl +; CHECK: ret <4 x i32> +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } + diff --git a/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll b/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll new file mode 100644 index 000000000000..e9710df5670c --- /dev/null +++ b/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll @@ -0,0 +1,165 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare <4 x double> @llvm.ppc.qpx.qvlfs(i8*) #1 + +define <4 x double> @test1(<4 x float>* %h) #0 { +entry: + %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1 + %hv = bitcast <4 x float>* %h1 to i8* + %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv) + +; CHECK-LABEL: @test1 +; CHECK: @llvm.ppc.qpx.qvlfs +; CHECK: ret <4 x double> + + %v0 = load <4 x float>, <4 x float>* %h, align 8 + %v0e = fpext <4 x float> %v0 to <4 x double> + %a = fadd <4 x double> %v0e, %vl + ret <4 x double> %a +} + +define <4 x double> @test1a(<4 x float>* align 16 %h) #0 { +entry: + %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1 + %hv = bitcast <4 x float>* %h1 to i8* + %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv) + +; CHECK-LABEL: @test1a +; CHECK-NOT: @llvm.ppc.qpx.qvlfs +; CHECK-NOT: load <4 x double> +; CHECK: ret <4 x double> + + %v0 = load <4 x float>, <4 x float>* %h, align 8 + %v0e = fpext <4 x float> %v0 to <4 x double> + %a = fadd <4 x double> %v0e, %vl + ret <4 x double> %a +} + +declare void @llvm.ppc.qpx.qvstfs(<4 x double>, i8*) #0 + +define <4 x float> @test2(<4 x float>* %h, <4 x double> %d) #0 { +entry: + %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1 + %hv = bitcast <4 x float>* %h1 to i8* + call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv) + + %v0 = load <4 x float>, <4 x float>* %h, align 8 + ret <4 x float> %v0 + +; CHECK-LABEL: @test2 +; CHECK: @llvm.ppc.qpx.qvstfs +; CHECK: ret <4 x float> +} + +define <4 x float> @test2a(<4 x float>* align 16 %h, <4 x double> %d) #0 { +entry: + %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1 + %hv = bitcast <4 x float>* %h1 to i8* + call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv) + + %v0 = load <4 x float>, <4 x float>* %h, align 8 + ret <4 x float> %v0 + +; CHECK-LABEL: @test2 +; CHECK: fptrunc <4 x double> %d to <4 x float> +; CHECK-NOT: @llvm.ppc.qpx.qvstfs +; CHECK-NOT: store <4 x double> +; CHECK: ret <4 x float> +} + +declare <4 x double> @llvm.ppc.qpx.qvlfd(i8*) #1 + +define <4 x double> @test1l(<4 x double>* %h) #0 { +entry: + %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1 + %hv = bitcast <4 x double>* %h1 to i8* + %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv) + +; CHECK-LABEL: @test1l +; CHECK: @llvm.ppc.qpx.qvlfd +; CHECK: ret <4 x double> + + %v0 = load <4 x double>, <4 x double>* %h, align 8 + %a = fadd <4 x double> %v0, %vl + ret <4 x double> %a +} + +define <4 x double> @test1ln(<4 x double>* align 16 %h) #0 { +entry: + %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1 + %hv = bitcast <4 x double>* %h1 to i8* + %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv) + +; CHECK-LABEL: @test1ln +; CHECK: @llvm.ppc.qpx.qvlfd +; CHECK: ret <4 x double> + + %v0 = load <4 x double>, <4 x double>* %h, align 8 + %a = fadd <4 x double> %v0, %vl + ret <4 x double> %a +} + +define <4 x double> @test1la(<4 x double>* align 32 %h) #0 { +entry: + %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1 + %hv = bitcast <4 x double>* %h1 to i8* + %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv) + +; CHECK-LABEL: @test1la +; CHECK-NOT: @llvm.ppc.qpx.qvlfd +; CHECK: ret <4 x double> + + %v0 = load <4 x double>, <4 x double>* %h, align 8 + %a = fadd <4 x double> %v0, %vl + ret <4 x double> %a +} + +declare void @llvm.ppc.qpx.qvstfd(<4 x double>, i8*) #0 + +define <4 x double> @test2l(<4 x double>* %h, <4 x double> %d) #0 { +entry: + %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1 + %hv = bitcast <4 x double>* %h1 to i8* + call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv) + + %v0 = load <4 x double>, <4 x double>* %h, align 8 + ret <4 x double> %v0 + +; CHECK-LABEL: @test2l +; CHECK: @llvm.ppc.qpx.qvstfd +; CHECK: ret <4 x double> +} + +define <4 x double> @test2ln(<4 x double>* align 16 %h, <4 x double> %d) #0 { +entry: + %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1 + %hv = bitcast <4 x double>* %h1 to i8* + call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv) + + %v0 = load <4 x double>, <4 x double>* %h, align 8 + ret <4 x double> %v0 + +; CHECK-LABEL: @test2ln +; CHECK: @llvm.ppc.qpx.qvstfd +; CHECK: ret <4 x double> +} + +define <4 x double> @test2la(<4 x double>* align 32 %h, <4 x double> %d) #0 { +entry: + %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1 + %hv = bitcast <4 x double>* %h1 to i8* + call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv) + + %v0 = load <4 x double>, <4 x double>* %h, align 8 + ret <4 x double> %v0 + +; CHECK-LABEL: @test2l +; CHECK-NOT: @llvm.ppc.qpx.qvstfd +; CHECK: ret <4 x double> +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } + diff --git a/test/Transforms/InstCombine/PowerPC/lit.local.cfg b/test/Transforms/InstCombine/PowerPC/lit.local.cfg new file mode 100644 index 000000000000..5d33887ff0a4 --- /dev/null +++ b/test/Transforms/InstCombine/PowerPC/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll b/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll new file mode 100644 index 000000000000..ad264fb15b31 --- /dev/null +++ b/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll @@ -0,0 +1,44 @@ +; Verify that we can create unaligned loads and stores from VSX intrinsics. + +; RUN: opt < %s -instcombine -S | FileCheck %s + +target triple = "powerpc64-unknown-linux-gnu" + +@vf = common global <4 x float> zeroinitializer, align 1 +@res_vf = common global <4 x float> zeroinitializer, align 1 +@vd = common global <2 x double> zeroinitializer, align 1 +@res_vd = common global <2 x double> zeroinitializer, align 1 + +define void @test1() { +entry: + %t1 = alloca <4 x float>*, align 8 + %t2 = alloca <2 x double>*, align 8 + store <4 x float>* @vf, <4 x float>** %t1, align 8 + %0 = load <4 x float>*, <4 x float>** %t1, align 8 + %1 = bitcast <4 x float>* %0 to i8* + %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %1) + store <4 x float>* @res_vf, <4 x float>** %t1, align 8 + %3 = load <4 x float>*, <4 x float>** %t1, align 8 + %4 = bitcast <4 x float>* %3 to i8* + call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %2, i8* %4) + store <2 x double>* @vd, <2 x double>** %t2, align 8 + %5 = load <2 x double>*, <2 x double>** %t2, align 8 + %6 = bitcast <2 x double>* %5 to i8* + %7 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %6) + store <2 x double>* @res_vd, <2 x double>** %t2, align 8 + %8 = load <2 x double>*, <2 x double>** %t2, align 8 + %9 = bitcast <2 x double>* %8 to i8* + call void @llvm.ppc.vsx.stxvd2x(<2 x double> %7, i8* %9) + ret void +} + +; CHECK-LABEL: @test1 +; CHECK: %0 = load <4 x i32>, <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1 +; CHECK: store <4 x i32> %0, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 1 +; CHECK: %1 = load <2 x double>, <2 x double>* @vd, align 1 +; CHECK: store <2 x double> %1, <2 x double>* @res_vd, align 1 + +declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*) +declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, i8*) +declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*) +declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, i8*) diff --git a/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll b/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll new file mode 100644 index 000000000000..fde0692d00a2 --- /dev/null +++ b/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; The test checks the folding of cmp(sub(a,b),0) into cmp(a,b). + +define i8 @sub_compare_foldingPD128_safe(<2 x double> %a, <2 x double> %b){ +; CHECK-LABEL: @sub_compare_foldingPD128_safe( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB_SAFE:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5, i8 -1) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.safe = fsub <2 x double> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.safe , <2 x double> zeroinitializer, i32 5, i8 -1) + ret i8 %0 +} + + +define i8 @sub_compare_foldingPD128(<2 x double> %a, <2 x double> %b){ +; CHECK-LABEL: @sub_compare_foldingPD128( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5, i8 -1) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i = fsub ninf <2 x double> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.i , <2 x double> zeroinitializer, i32 5, i8 -1) + ret i8 %0 +} + + +define i8 @sub_compare_foldingPD256(<4 x double> %a, <4 x double> %b){ +; CHECK-LABEL: @sub_compare_foldingPD256( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5, i8 -1) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i1 = fsub ninf <4 x double> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5, i8 -1) + ret i8 %0 +} + + +define i8 @sub_compare_foldingPD512(<8 x double> %a, <8 x double> %b){ +; CHECK-LABEL: @sub_compare_foldingPD512( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i8 -1, i32 4) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i2 = fsub ninf <8 x double> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i8 -1, i32 4) + ret i8 %0 +} + + +define i8 @sub_compare_foldingPS128(<4 x float> %a, <4 x float> %b){ +; CHECK-LABEL: @sub_compare_foldingPS128( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12, i8 -1) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i3 = fsub ninf <4 x float> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12, i8 -1) + ret i8 %0 +} + + +define i8 @sub_compare_foldingPS256(<8 x float> %a, <8 x float> %b){ +; CHECK-LABEL: @sub_compare_foldingPS256( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5, i8 -1) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i4 = fsub ninf <8 x float> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5, i8 -1) + ret i8 %0 +} + + +define i16 @sub_compare_foldingPS512(<16 x float> %a, <16 x float> %b){ +; CHECK-LABEL: @sub_compare_foldingPS512( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i16 -1, i32 4) +; CHECK-NEXT: ret i16 [[TMP0]] +; +entry: + %sub.i5 = fsub ninf <16 x float> %a, %b + %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i16 -1, i32 4) + ret i16 %0 +} + + + +define i8 @sub_compare_folding_swapPD128(<2 x double> %a, <2 x double> %b){ +; CHECK-LABEL: @sub_compare_folding_swapPD128( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5, i8 -1) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i = fsub ninf <2 x double> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5, i8 -1) + ret i8 %0 +} + + +define i8 @sub_compare_folding_swapPD256(<4 x double> %a, <4 x double> %b){ +; CHECK-LABEL: @sub_compare_folding_swapPD256( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5, i8 -1) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i = fsub ninf <4 x double> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5, i8 -1) + ret i8 %0 +} + + +define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){ +; CHECK-LABEL: @sub_compare_folding_swapPD512( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i8 -1, i32 4) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i = fsub ninf <8 x double> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i8 -1, i32 4) + ret i8 %0 +} + + +define i8 @sub_compare_folding_swapPS128(<4 x float> %a, <4 x float> %b){ +; CHECK-LABEL: @sub_compare_folding_swapPS128( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12, i8 -1) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i = fsub ninf <4 x float> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12, i8 -1) + ret i8 %0 +} + + +define i8 @sub_compare_folding_swapPS256(<8 x float> %a, <8 x float> %b){ +; CHECK-LABEL: @sub_compare_folding_swapPS256( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5, i8 -1) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + %sub.i = fsub ninf <8 x float> %a, %b + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5, i8 -1) + ret i8 %0 +} + + +define i16 @sub_compare_folding_swapPS512(<16 x float> %a, <16 x float> %b){ +; CHECK-LABEL: @sub_compare_folding_swapPS512( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i16 -1, i32 4) +; CHECK-NEXT: ret i16 [[TMP0]] +; +entry: + %sub.i = fsub ninf <16 x float> %a, %b + %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i16 -1, i32 4) + ret i16 %0 +} + +declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32, i8) +declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32, i8) +declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i8, i32) +declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, i8) +declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, i8) +declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i16, i32) diff --git a/test/Transforms/InstCombine/X86/blend_x86.ll b/test/Transforms/InstCombine/X86/blend_x86.ll new file mode 100644 index 000000000000..39ceb0186efe --- /dev/null +++ b/test/Transforms/InstCombine/X86/blend_x86.ll @@ -0,0 +1,151 @@ +; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s + +define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { +; CHECK-LABEL: @constant_blendvpd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %ab, <2 x double> %xy, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> ) + ret <2 x double> %1 +} + +define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) { +; CHECK-LABEL: @constant_blendvpd_zero +; CHECK-NEXT: ret <2 x double> %xy + %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer) + ret <2 x double> %1 +} + +define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) { +; CHECK-LABEL: @constant_blendvpd_dup +; CHECK-NEXT: ret <2 x double> %xy + %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel) + ret <2 x double> %1 +} + +define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { +; CHECK-LABEL: @constant_blendvps( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %abcd, <4 x float> %xyzw, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> ) + ret <4 x float> %1 +} + +define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) { +; CHECK-LABEL: @constant_blendvps_zero +; CHECK-NEXT: ret <4 x float> %xyzw + %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer) + ret <4 x float> %1 +} + +define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) { +; CHECK-LABEL: @constant_blendvps_dup +; CHECK-NEXT: ret <4 x float> %xyzw + %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel) + ret <4 x float> %1 +} + +define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { +; CHECK-LABEL: @constant_pblendvb( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %abcd, <16 x i8> %xyzw, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) { +; CHECK-LABEL: @constant_pblendvb_zero +; CHECK-NEXT: ret <16 x i8> %xyzw + %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer) + ret <16 x i8> %1 +} + +define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) { +; CHECK-LABEL: @constant_pblendvb_dup +; CHECK-NEXT: ret <16 x i8> %xyzw + %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel) + ret <16 x i8> %1 +} + +define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { +; CHECK-LABEL: @constant_blendvpd_avx( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %ab, <4 x double> %xy, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> ) + ret <4 x double> %1 +} + +define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) { +; CHECK-LABEL: @constant_blendvpd_avx_zero +; CHECK-NEXT: ret <4 x double> %xy + %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer) + ret <4 x double> %1 +} + +define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) { +; CHECK-LABEL: @constant_blendvpd_avx_dup +; CHECK-NEXT: ret <4 x double> %xy + %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel) + ret <4 x double> %1 +} + +define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { +; CHECK-LABEL: @constant_blendvps_avx( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %abcd, <8 x float> %xyzw, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> ) + ret <8 x float> %1 +} + +define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) { +; CHECK-LABEL: @constant_blendvps_avx_zero +; CHECK-NEXT: ret <8 x float> %xyzw + %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer) + ret <8 x float> %1 +} + +define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) { +; CHECK-LABEL: @constant_blendvps_avx_dup +; CHECK-NEXT: ret <8 x float> %xyzw + %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel) + ret <8 x float> %1 +} + +define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { +; CHECK-LABEL: @constant_pblendvb_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %abcd, <32 x i8> %xyzw, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, + <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) { +; CHECK-LABEL: @constant_pblendvb_avx2_zero +; CHECK-NEXT: ret <32 x i8> %xyzw + %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer) + ret <32 x i8> %1 +} + +define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) { +; CHECK-LABEL: @constant_pblendvb_avx2_dup +; CHECK-NEXT: ret <32 x i8> %xyzw + %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel) + ret <32 x i8> %1 +} + +declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) + +declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) diff --git a/test/Transforms/InstCombine/X86/lit.local.cfg b/test/Transforms/InstCombine/X86/lit.local.cfg new file mode 100644 index 000000000000..c8625f4d9d24 --- /dev/null +++ b/test/Transforms/InstCombine/X86/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'X86' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/InstCombine/X86/pr2645-1.ll b/test/Transforms/InstCombine/X86/pr2645-1.ll new file mode 100644 index 000000000000..2986d21866bf --- /dev/null +++ b/test/Transforms/InstCombine/X86/pr2645-1.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -instcombine -S | grep shufflevector +; PR2645 + +; instcombine shouldn't delete the shufflevector. + +define internal void @""(i8*, i32, i8*) { +;