diff options
Diffstat (limited to 'test/Transforms')
25 files changed, 427 insertions, 130 deletions
diff --git a/test/Transforms/EliminateAvailableExternally/visibility.ll b/test/Transforms/EliminateAvailableExternally/visibility.ll new file mode 100644 index 0000000000000..9966fcf30e85d --- /dev/null +++ b/test/Transforms/EliminateAvailableExternally/visibility.ll @@ -0,0 +1,11 @@ +; RUN: opt -elim-avail-extern -S < %s | FileCheck %s + +; CHECK: declare hidden void @f() +define available_externally hidden void @f() { + ret void +} + +define void @g() { + call void @f() + ret void +} diff --git a/test/Transforms/GVN/pre-new-inst.ll b/test/Transforms/GVN/pre-new-inst.ll new file mode 100644 index 0000000000000..238b8a687cccc --- /dev/null +++ b/test/Transforms/GVN/pre-new-inst.ll @@ -0,0 +1,29 @@ +; RUN: opt -basicaa -gvn -S %s | FileCheck %s + +%MyStruct = type { i32, i32 } +define i8 @foo(i64 %in, i8* %arr) { + %addr = alloca %MyStruct + %dead = trunc i64 %in to i32 + br i1 undef, label %next, label %tmp + +tmp: + call void @bar() + br label %next + +next: + %addr64 = bitcast %MyStruct* %addr to i64* + store i64 %in, i64* %addr64 + br label %final + +final: + %addr32 = getelementptr %MyStruct, %MyStruct* %addr, i32 0, i32 0 + %idx32 = load i32, i32* %addr32 + +; CHECK: %resptr = getelementptr i8, i8* %arr, i32 %dead + %resptr = getelementptr i8, i8* %arr, i32 %idx32 + %res = load i8, i8* %resptr + + ret i8 %res +} + +declare void @bar() diff --git a/test/Transforms/IndVarSimplify/lrev-existing-umin.ll b/test/Transforms/IndVarSimplify/lrev-existing-umin.ll new file mode 100644 index 0000000000000..961c9fd944d93 --- /dev/null +++ b/test/Transforms/IndVarSimplify/lrev-existing-umin.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) { +; CHECK-LABEL: @f( +not_zero11.preheader: + %tmp13 = icmp ugt i32 %length.i, %length.i.88 + %tmp14 = select i1 %tmp13, i32 %length.i.88, i32 %length.i + %tmp15 = icmp sgt i32 %tmp14, 0 + br i1 %tmp15, label %not_zero11, label %not_zero11.postloop + +not_zero11: + %v_1 = phi i32 [ %tmp22, %not_zero11 ], [ 0, %not_zero11.preheader ] + %tmp16 = zext i32 %v_1 to i64 + %tmp17 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp16 + %tmp18 = load i8, i8* %tmp17, align 1 + %tmp19 = zext i8 %tmp18 to i32 + %tmp20 = or i32 %tmp19, %tmp10 + %tmp21 = trunc i32 %tmp20 to i8 + %addr22 = getelementptr inbounds i8, i8* %tmp12, i64 %tmp16 + store i8 %tmp21, i8* %addr22, align 1 + %tmp22 = add nuw nsw i32 %v_1, 1 + %tmp23 = icmp slt i32 %tmp22, %tmp14 + br i1 %tmp23, label %not_zero11, label %main.exit.selector + +main.exit.selector: +; CHECK-LABEL: main.exit.selector: +; CHECK: %tmp24 = icmp slt i32 %tmp14, %length.i + %tmp24 = icmp slt i32 %tmp22, %length.i + br i1 %tmp24, label %not_zero11.postloop, label %leave + +leave: + ret void + +not_zero11.postloop: + ret void +} diff --git a/test/Transforms/Inline/frameescape.ll b/test/Transforms/Inline/frameescape.ll index fb336024f9375..6615fe9a76e4d 100644 --- a/test/Transforms/Inline/frameescape.ll +++ b/test/Transforms/Inline/frameescape.ll @@ -1,13 +1,13 @@ ; RUN: opt -inline -S < %s | FileCheck %s -; PR23216: We can't inline functions using llvm.frameescape. +; PR23216: We can't inline functions using llvm.localescape. -declare void @llvm.frameescape(...) +declare void @llvm.localescape(...) declare i8* @llvm.frameaddress(i32) -declare i8* @llvm.framerecover(i8*, i8*, i32) +declare i8* @llvm.localrecover(i8*, i8*, i32) define internal void @foo(i8* %fp) { - %a.i8 = call i8* @llvm.framerecover(i8* bitcast (i32 ()* @bar to i8*), i8* %fp, i32 0) + %a.i8 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @bar to i8*), i8* %fp, i32 0) %a = bitcast i8* %a.i8 to i32* store i32 42, i32* %a ret void @@ -16,7 +16,7 @@ define internal void @foo(i8* %fp) { define internal i32 @bar() { entry: %a = alloca i32 - call void (...) @llvm.frameescape(i32* %a) + call void (...) @llvm.localescape(i32* %a) %fp = call i8* @llvm.frameaddress(i32 0) tail call void @foo(i8* %fp) %r = load i32, i32* %a @@ -27,7 +27,7 @@ entry: define internal i32 @bar_alwaysinline() alwaysinline { entry: %a = alloca i32 - call void (...) @llvm.frameescape(i32* %a) + call void (...) @llvm.localescape(i32* %a) tail call void @foo(i8* null) ret i32 0 } diff --git a/test/Transforms/InstCombine/align-external.ll b/test/Transforms/InstCombine/align-external.ll index ee98a01201795..15f3096105bbd 100644 --- a/test/Transforms/InstCombine/align-external.ll +++ b/test/Transforms/InstCombine/align-external.ll @@ -3,16 +3,14 @@ ; Don't assume that external global variables or those with weak linkage have ; their preferred alignment. They may only have the ABI minimum alignment. -; CHECK: %s = shl i64 %a, 3 -; CHECK: %r = or i64 %s, ptrtoint (i32* @A to i64) -; CHECK: %q = add i64 %r, 1 -; CHECK: ret i64 %q - target datalayout = "i32:8:32" @A = external global i32 @B = weak_odr global i32 0 +@C = available_externally global <4 x i32> zeroinitializer, align 4 +; CHECK: @C = available_externally global <4 x i32> zeroinitializer, align 4 + define i64 @foo(i64 %a) { %t = ptrtoint i32* @A to i64 %s = shl i64 %a, 3 @@ -21,9 +19,23 @@ define i64 @foo(i64 %a) { ret i64 %q } +; CHECK-LABEL: define i64 @foo(i64 %a) +; CHECK: %s = shl i64 %a, 3 +; CHECK: %r = or i64 %s, ptrtoint (i32* @A to i64) +; CHECK: %q = add i64 %r, 1 +; CHECK: ret i64 %q + define i32 @bar() { -; CHECK-LABEL: @bar( %r = load i32, i32* @B, align 1 -; CHECK: align 1 ret i32 %r } + +; CHECK-LABEL: @bar() +; CHECK: align 1 + +define void @vec_store() { + store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* @C, align 4 + ret void +} +; CHECK: define void @vec_store() +; CHECK: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* @C, align 4 diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index 9767704c85cfd..bea063787a751 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -17,6 +17,8 @@ declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone +declare double @llvm.cos.f64(double %Val) nounwind readonly +declare double @llvm.sin.f64(double %Val) nounwind readonly define i8 @uaddtest1(i8 %A, i8 %B) { %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B) @@ -425,3 +427,23 @@ define %ov.result.32 @never_overflows_ssub_test0(i32 %a) { ; CHECK-NEXT: %[[x:.*]] = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %a, 0 ; CHECK-NEXT: ret %ov.result.32 %[[x]] } + +define void @cos(double *%P) { +entry: + %B = tail call double @llvm.cos.f64(double 0.0) nounwind + store volatile double %B, double* %P + + ret void +; CHECK-LABEL: @cos( +; CHECK: store volatile double 1.000000e+00, double* %P +} + +define void @sin(double *%P) { +entry: + %B = tail call double @llvm.sin.f64(double 0.0) nounwind + store volatile double %B, double* %P + + ret void +; CHECK-LABEL: @sin( +; CHECK: store volatile double 0.000000e+00, double* %P +} diff --git a/test/Transforms/InstCombine/load-combine-metadata.ll b/test/Transforms/InstCombine/load-combine-metadata.ll new file mode 100644 index 0000000000000..9b9c1fe607b90 --- /dev/null +++ b/test/Transforms/InstCombine/load-combine-metadata.ll @@ -0,0 +1,29 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK-LABEL: @test_load_load_combine_metadata( +; Check that range and AA metadata is combined +; CHECK: %[[V:.*]] = load i32, i32* %0 +; CHECK-SAME: !tbaa !{{[0-9]+}} +; CHECK-SAME: !range ![[RANGE:[0-9]+]] +; CHECK: store i32 %[[V]], i32* %1 +; CHECK: store i32 %[[V]], i32* %2 +define void @test_load_load_combine_metadata(i32*, i32*, i32*) { + %a = load i32, i32* %0, !tbaa !8, !range !0, !alias.scope !5, !noalias !6 + %b = load i32, i32* %0, !tbaa !8, !range !1 + store i32 %a, i32* %1 + store i32 %b, i32* %2 + ret void +} + +; CHECK: ![[RANGE]] = !{i32 0, i32 1, i32 8, i32 9} +!0 = !{ i32 0, i32 1 } +!1 = !{ i32 8, i32 9 } +!2 = !{!2} +!3 = !{!3, !2} +!4 = !{!4, !2} +!5 = !{!3} +!6 = !{!4} +!7 = !{ !"tbaa root" } +!8 = !{ !7, !7, i64 0 } diff --git a/test/Transforms/InstCombine/load_combine_aa.ll b/test/Transforms/InstCombine/load_combine_aa.ll new file mode 100644 index 0000000000000..b84b81ddd5d96 --- /dev/null +++ b/test/Transforms/InstCombine/load_combine_aa.ll @@ -0,0 +1,15 @@ +; RUN: opt -basicaa -instcombine -S < %s | FileCheck %s + +; CHECK-LABEL: @test_load_combine_aa( +; CHECK: %[[V:.*]] = load i32, i32* %0 +; CHECK: store i32 0, i32* %3 +; CHECK: store i32 %[[V]], i32* %1 +; CHECK: store i32 %[[V]], i32* %2 +define void @test_load_combine_aa(i32*, i32*, i32*, i32* noalias) { + %a = load i32, i32* %0 + store i32 0, i32* %3 + %b = load i32, i32* %0 + store i32 %a, i32* %1 + store i32 %b, i32* %2 + ret void +} diff --git a/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll b/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll index 885cb70007e67..7e391aba30456 100644 --- a/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll +++ b/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll @@ -27,3 +27,12 @@ define { i8, i32 } @test2({ i8*, i32 } %x) { ret { i8, i32 } %ins ; CHECK-LABEL: @test2( } + +define i32 @test3(i32 %a, float %b) { + %agg1 = insertvalue {i32, float} undef, i32 %a, 0 + %agg2 = insertvalue {i32, float} %agg1, float %b, 1 + %ev = extractvalue {i32, float} %agg2, 0 + ret i32 %ev +; CHECK-LABEL: @test3( +; CHECK: ret i32 %a +} diff --git a/test/Transforms/InstSimplify/floating-point-compare.ll b/test/Transforms/InstSimplify/floating-point-compare.ll index af48d062b4f64..8174f58345332 100644 --- a/test/Transforms/InstSimplify/floating-point-compare.ll +++ b/test/Transforms/InstSimplify/floating-point-compare.ll @@ -58,3 +58,18 @@ define i1 @orderedLessZeroPowi(double,double) { ret i1 %olt } +define i1 @nonans1(double %in1, double %in2) { + %cmp = fcmp nnan uno double %in1, %in2 + ret i1 %cmp + +; CHECK-LABEL: @nonans1 +; CHECK-NEXT: ret i1 false +} + +define i1 @nonans2(double %in1, double %in2) { + %cmp = fcmp nnan ord double %in1, %in2 + ret i1 %cmp + +; CHECK-LABEL: @nonans2 +; CHECK-NEXT: ret i1 true +} diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll index f1f0b037fdbd1..d75dc364243cd 100644 --- a/test/Transforms/InstSimplify/undef.ll +++ b/test/Transforms/InstSimplify/undef.ll @@ -265,3 +265,17 @@ define i32 @test34(i32 %a) { %b = lshr i32 undef, 0 ret i32 %b } + +; CHECK-LABEL: @test35 +; CHECK: ret i32 undef +define i32 @test35(<4 x i32> %V) { + %b = extractelement <4 x i32> %V, i32 4 + ret i32 %b +} + +; CHECK-LABEL: @test36 +; CHECK: ret i32 undef +define i32 @test36(i32 %V) { + %b = extractelement <4 x i32> undef, i32 %V + ret i32 %b +} diff --git a/test/Transforms/LICM/PR24013.ll b/test/Transforms/LICM/PR24013.ll new file mode 100644 index 0000000000000..4557bfcfd1226 --- /dev/null +++ b/test/Transforms/LICM/PR24013.ll @@ -0,0 +1,19 @@ +; RUN: opt -licm -S < %s | FileCheck %s + +define void @f(i1 zeroext %p1) { +; CHECK-LABEL: @f( +entry: + br label %lbl + +lbl.loopexit: ; No predecessors! + br label %lbl + +lbl: ; preds = %lbl.loopexit, %entry + %phi = phi i32 [ %conv, %lbl.loopexit ], [ undef, %entry ] +; CHECK: phi i32 [ undef, {{.*}} ], [ undef + br label %if.then.5 + +if.then.5: ; preds = %if.then.5, %lbl + %conv = zext i1 undef to i32 + br label %if.then.5 +} diff --git a/test/Transforms/LoopDistribute/basic-with-memchecks.ll b/test/Transforms/LoopDistribute/basic-with-memchecks.ll index fde06d33c5a5c..3aced48504111 100644 --- a/test/Transforms/LoopDistribute/basic-with-memchecks.ll +++ b/test/Transforms/LoopDistribute/basic-with-memchecks.ll @@ -32,8 +32,9 @@ entry: %e = load i32*, i32** @E, align 8 br label %for.body -; We have two compares for each array overlap check which is a total of 10 -; compares. +; We have two compares for each array overlap check. +; Since the checks to A and A + 4 get merged, this will give us a +; total of 8 compares. ; ; CHECK: for.body.lver.memcheck: ; CHECK: = icmp @@ -48,9 +49,6 @@ entry: ; CHECK: = icmp ; CHECK: = icmp -; CHECK: = icmp -; CHECK: = icmp - ; CHECK-NOT: = icmp ; CHECK: br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph.ldist1 diff --git a/test/Transforms/LoopIdiom/ctpop-multiple-users-crash.ll b/test/Transforms/LoopIdiom/ctpop-multiple-users-crash.ll new file mode 100644 index 0000000000000..ddb7bdbe7d191 --- /dev/null +++ b/test/Transforms/LoopIdiom/ctpop-multiple-users-crash.ll @@ -0,0 +1,34 @@ +; RUN: opt -loop-idiom -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios8.0.0" + +; When we replace the precondition with a ctpop, we need to ensure +; that only the first branch reads the ctpop. The store prior +; to that should continue to read from the original compare. + +; CHECK: %tobool.5 = icmp ne i32 %num, 0 +; CHECK: store i1 %tobool.5, i1* %ptr + +define internal fastcc i32 @num_bits_set(i32 %num, i1* %ptr) #1 { +entry: + %tobool.5 = icmp ne i32 %num, 0 + store i1 %tobool.5, i1* %ptr + br i1 %tobool.5, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %count.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %num.addr.06 = phi i32 [ %num, %for.body.lr.ph ], [ %and, %for.body ] + %sub = add i32 %num.addr.06, -1 + %and = and i32 %sub, %num.addr.06 + %inc = add nsw i32 %count.07, 1 + %tobool = icmp ne i32 %and, 0 + br i1 %tobool, label %for.body, label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %count.0.lcssa = phi i32 [ %inc, %for.body ], [ 0, %entry ] + ret i32 %count.0.lcssa +}
\ No newline at end of file diff --git a/test/Transforms/LoopRotate/oz-disable.ll b/test/Transforms/LoopRotate/oz-disable.ll new file mode 100644 index 0000000000000..7a6a9bf33a188 --- /dev/null +++ b/test/Transforms/LoopRotate/oz-disable.ll @@ -0,0 +1,30 @@ +; REQUIRES: asserts +; RUN: opt < %s -S -Os -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS +; RUN: opt < %s -S -Oz -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ + +; Loop should be rotated for -Os but not for -Oz. +; OS: rotating Loop at depth 1 +; OZ-NOT: rotating Loop at depth 1 + +@e = global i32 10 + +declare void @use(i32) + +define void @test() { +entry: + %end = load i32, i32* @e + br label %loop + +loop: + %n.phi = phi i32 [ %n, %loop.fin ], [ 0, %entry ] + %cond = icmp eq i32 %n.phi, %end + br i1 %cond, label %exit, label %loop.fin + +loop.fin: + %n = add i32 %n.phi, 1 + call void @use(i32 %n) + br label %loop + +exit: + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/ephemeral.ll b/test/Transforms/LoopStrengthReduce/ephemeral.ll new file mode 100644 index 0000000000000..a0d1d44b1bac4 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/ephemeral.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" + +; for (int i = 0; i < n; ++i) { +; use(i * 5 + 3); +; // i * a + b is ephemeral and shouldn't be promoted by LSR +; __builtin_assume(i * a + b >= 0); +; } +define void @ephemeral(i32 %a, i32 %b, i32 %n) { +; CHECK-LABEL: @ephemeral( +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + ; Only i and i * 5 + 3 should be indvars, not i * a + b. +; CHECK: phi i32 +; CHECK: phi i32 +; CHECK-NOT: phi i32 + %inc = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, %n + + %0 = mul nsw i32 %i, 5 + %1 = add nsw i32 %0, 3 + call void @use(i32 %1) + + %2 = mul nsw i32 %i, %a + %3 = add nsw i32 %2, %b + %4 = icmp sgt i32 %3, -1 + call void @llvm.assume(i1 %4) + + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +declare void @use(i32) + +declare void @llvm.assume(i1) diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll index 1354181becd0c..8e0d77513cc10 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -86,9 +86,9 @@ for.end: ; preds = %for.body ; #pragma clang loop unroll(full) ; Loop should be fully unrolled. ; -; CHECK-LABEL: @loop64_with_enable( +; CHECK-LABEL: @loop64_with_full( ; CHECK-NOT: br i1 -define void @loop64_with_enable(i32* nocapture %a) { +define void @loop64_with_full(i32* nocapture %a) { entry: br label %for.body @@ -139,14 +139,13 @@ for.end: ; preds = %for.body !6 = !{!"llvm.loop.unroll.count", i32 4} ; #pragma clang loop unroll(full) -; Full unrolling is requested, but loop has a dynamic trip count so +; Full unrolling is requested, but loop has a runtime trip count so ; no unrolling should occur. ; -; CHECK-LABEL: @dynamic_loop_with_enable( +; CHECK-LABEL: @runtime_loop_with_full( ; CHECK: store i32 ; CHECK-NOT: store i32 -; CHECK: br i1 -define void @dynamic_loop_with_enable(i32* nocapture %a, i32 %b) { +define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 @@ -168,22 +167,22 @@ for.end: ; preds = %for.body, %entry !8 = !{!8, !4} ; #pragma clang loop unroll_count(4) -; Loop has a dynamic trip count. Unrolling should occur, but no -; conditional branches can be removed. +; Loop has a runtime trip count. Runtime unrolling should occur and loop +; should be duplicated (original and 4x unrolled). ; -; CHECK-LABEL: @dynamic_loop_with_count4( +; CHECK-LABEL: @runtime_loop_with_count4( +; CHECK: for.body.prol: +; CHECK: store ; CHECK-NOT: store ; CHECK: br i1 +; CHECK: for.body ; CHECK: store -; CHECK: br i1 ; CHECK: store -; CHECK: br i1 ; CHECK: store -; CHECK: br i1 ; CHECK: store +; CHECK-NOT: store ; CHECK: br i1 -; CHECK-NOT: br i1 -define void @dynamic_loop_with_count4(i32* nocapture %a, i32 %b) { +define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll index 1c21748d8bdd1..8640950be32e5 100644 --- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll @@ -9,9 +9,9 @@ ; DEBUG-OUTPUT-NOT: .loc ; DEBUG-OUTPUT-NOT: {{.*}}.debug_info -; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1) -; UNROLLED: remark: vectorization-remarks.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial) ; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vector width and interleave count are explicitly set to 1 +; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 1) +; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved by 4 (vectorization not beneficial) target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/Transforms/PlaceSafepoints/statepoint-frameescape.ll b/test/Transforms/PlaceSafepoints/statepoint-frameescape.ll index a6ee5ee078fba..c4e250957a8f6 100644 --- a/test/Transforms/PlaceSafepoints/statepoint-frameescape.ll +++ b/test/Transforms/PlaceSafepoints/statepoint-frameescape.ll @@ -1,17 +1,17 @@ ; RUN: opt %s -S -place-safepoints | FileCheck %s -declare void @llvm.frameescape(...) +declare void @llvm.localescape(...) -; Do we insert the entry safepoint after the frameescape intrinsic? +; Do we insert the entry safepoint after the localescape intrinsic? define void @parent() gc "statepoint-example" { ; CHECK-LABEL: @parent entry: ; CHECK-LABEL: entry ; CHECK-NEXT: alloca -; CHECK-NEXT: frameescape +; CHECK-NEXT: localescape ; CHECK-NEXT: statepoint %ptr = alloca i32 - call void (...) @llvm.frameescape(i32* %ptr) + call void (...) @llvm.localescape(i32* %ptr) ret void } diff --git a/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll b/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll index 9ed86f8814739..35763953911b5 100644 --- a/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll +++ b/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll @@ -1,4 +1,9 @@ ; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s +; XFAIL: * +; +; FIXME: If this test expects to be vectorized, the TTI must indicate that the target +; has vector registers of the expected width. +; Currently, it says there are 8 vector registers that are 32-bits wide. target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll index 9f56e2195991e..8d25b3661dc37 100644 --- a/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/test/Transforms/SLPVectorizer/X86/cse.ll @@ -12,11 +12,8 @@ target triple = "i386-apple-macosx10.8.0" ;CHECK-LABEL: @test( ;CHECK: load <2 x double> -;CHECK: fadd <2 x double> -;CHECK: store <2 x double> -;CHECK: insertelement <2 x double> -;CHECK: fadd <2 x double> -;CHECK: store <2 x double> +;CHECK: fadd <4 x double> +;CHECK: store <4 x double> ;CHECK: ret i32 define i32 @test(double* nocapture %G) { @@ -48,11 +45,12 @@ entry: ; A[2] = A[2] * 7.6 * n + 3.0; ; A[3] = A[3] * 7.4 * n + 4.0; ;} -;CHECK-LABEL: @foo( -;CHECK: insertelement <2 x double> -;CHECK: insertelement <2 x double> -;CHECK-NOT: insertelement <2 x double> -;CHECK: ret +; CHECK-LABEL: @foo( +; CHECK: load <4 x double> +; CHECK: fmul <4 x double> +; CHECK: fmul <4 x double> +; CHECK: fadd <4 x double> +; CHECK: store <4 x double> define i32 @foo(double* nocapture %A, i32 %n) { entry: %0 = load double, double* %A, align 8 @@ -93,7 +91,7 @@ entry: ; } ; We can't merge the gather sequences because one does not dominate the other. -; CHECK: test2 +; CHECK-LABEL: @test2( ; CHECK: insertelement ; CHECK: insertelement ; CHECK: insertelement @@ -140,11 +138,12 @@ define i32 @test2(double* nocapture %G, i32 %k) { ; A[2] = A[2] * 7.9 * n + 6.0; ; A[3] = A[3] * 7.9 * n + 6.0; ;} -;CHECK-LABEL: @foo4( -;CHECK: insertelement <2 x double> -;CHECK: insertelement <2 x double> -;CHECK-NOT: insertelement <2 x double> -;CHECK: ret +; CHECK-LABEL: @foo4( +; CHECK: load <4 x double> +; CHECK: fmul <4 x double> +; CHECK: fmul <4 x double> +; CHECK: fadd <4 x double> +; CHECK: store <4 x double> define i32 @foo4(double* nocapture %A, i32 %n) { entry: %0 = load double, double* %A, align 8 diff --git a/test/Transforms/SLPVectorizer/X86/gep.ll b/test/Transforms/SLPVectorizer/X86/gep.ll index 3f952d7b242b2..d10f2b6015d4a 100644 --- a/test/Transforms/SLPVectorizer/X86/gep.ll +++ b/test/Transforms/SLPVectorizer/X86/gep.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" ; Test if SLP can handle GEP expressions. ; The test perform the following action: diff --git a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll index 0c16c34a18882..dace4b35b8711 100644 --- a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll +++ b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll @@ -1,22 +1,19 @@ -; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s +; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" ;CHECK-LABEL: @foo( -;CHECK: load <4 x i32> -;CHECK: add nsw <4 x i32> -;CHECK: store <4 x i32> -;CHECK: load <4 x i32> -;CHECK: add nsw <4 x i32> -;CHECK: store <4 x i32> +;CHECK: load <8 x i32> +;CHECK: add nsw <8 x i32> +;CHECK: store <8 x i32> ;CHECK: ret -define i32 @foo(i32* nocapture %A, i32 %n) #0 { +define i32 @foo(i32* nocapture %A, i32 %n) { entry: %cmp62 = icmp sgt i32 %n, 0 br i1 %cmp62, label %for.body, label %for.end -for.body: ; preds = %entry, %for.body +for.body: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv %0 = load i32, i32* %arrayidx, align 4 @@ -62,8 +59,7 @@ for.body: ; preds = %entry, %for.body %cmp = icmp slt i32 %15, %n br i1 %cmp, label %for.body, label %for.end -for.end: ; preds = %for.body, %entry +for.end: ret i32 undef } -attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/Transforms/SLPVectorizer/X86/pr19657.ll b/test/Transforms/SLPVectorizer/X86/pr19657.ll index a687aec76103c..32f8da4c7ee03 100644 --- a/test/Transforms/SLPVectorizer/X86/pr19657.ll +++ b/test/Transforms/SLPVectorizer/X86/pr19657.ll @@ -1,73 +1,45 @@ -; RUN: opt < %s -O1 -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s +; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s +; RUN: opt < %s -basicaa -slp-vectorizer -slp-max-reg-size=128 -S -mcpu=corei7-avx | FileCheck %s --check-prefix=V128 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -;CHECK: load <2 x double>, <2 x double>* -;CHECK: fadd <2 x double> -;CHECK: store <2 x double> +; CHECK-LABEL: @foo( +; CHECK: load <4 x double> +; CHECK: fadd <4 x double> +; CHECK: fadd <4 x double> +; CHECK: store <4 x double> -; Function Attrs: nounwind uwtable -define void @foo(double* %x) #0 { - %1 = alloca double*, align 8 - store double* %x, double** %1, align 8 - %2 = load double*, double** %1, align 8 - %3 = getelementptr inbounds double, double* %2, i64 0 - %4 = load double, double* %3, align 8 - %5 = load double*, double** %1, align 8 - %6 = getelementptr inbounds double, double* %5, i64 0 - %7 = load double, double* %6, align 8 - %8 = fadd double %4, %7 - %9 = load double*, double** %1, align 8 - %10 = getelementptr inbounds double, double* %9, i64 0 - %11 = load double, double* %10, align 8 - %12 = fadd double %8, %11 - %13 = load double*, double** %1, align 8 - %14 = getelementptr inbounds double, double* %13, i64 0 - store double %12, double* %14, align 8 - %15 = load double*, double** %1, align 8 - %16 = getelementptr inbounds double, double* %15, i64 1 - %17 = load double, double* %16, align 8 - %18 = load double*, double** %1, align 8 - %19 = getelementptr inbounds double, double* %18, i64 1 - %20 = load double, double* %19, align 8 - %21 = fadd double %17, %20 - %22 = load double*, double** %1, align 8 - %23 = getelementptr inbounds double, double* %22, i64 1 - %24 = load double, double* %23, align 8 - %25 = fadd double %21, %24 - %26 = load double*, double** %1, align 8 - %27 = getelementptr inbounds double, double* %26, i64 1 - store double %25, double* %27, align 8 - %28 = load double*, double** %1, align 8 - %29 = getelementptr inbounds double, double* %28, i64 2 - %30 = load double, double* %29, align 8 - %31 = load double*, double** %1, align 8 - %32 = getelementptr inbounds double, double* %31, i64 2 - %33 = load double, double* %32, align 8 - %34 = fadd double %30, %33 - %35 = load double*, double** %1, align 8 - %36 = getelementptr inbounds double, double* %35, i64 2 - %37 = load double, double* %36, align 8 - %38 = fadd double %34, %37 - %39 = load double*, double** %1, align 8 - %40 = getelementptr inbounds double, double* %39, i64 2 - store double %38, double* %40, align 8 - %41 = load double*, double** %1, align 8 - %42 = getelementptr inbounds double, double* %41, i64 3 - %43 = load double, double* %42, align 8 - %44 = load double*, double** %1, align 8 - %45 = getelementptr inbounds double, double* %44, i64 3 - %46 = load double, double* %45, align 8 - %47 = fadd double %43, %46 - %48 = load double*, double** %1, align 8 - %49 = getelementptr inbounds double, double* %48, i64 3 - %50 = load double, double* %49, align 8 - %51 = fadd double %47, %50 - %52 = load double*, double** %1, align 8 - %53 = getelementptr inbounds double, double* %52, i64 3 - store double %51, double* %53, align 8 +; V128-LABEL: @foo( +; V128: load <2 x double> +; V128: fadd <2 x double> +; V128: fadd <2 x double> +; V128: store <2 x double> +; V128: load <2 x double> +; V128: fadd <2 x double> +; V128: fadd <2 x double> +; V128: store <2 x double> + +define void @foo(double* %x) { + %1 = load double, double* %x, align 8 + %2 = fadd double %1, %1 + %3 = fadd double %2, %1 + store double %3, double* %x, align 8 + %4 = getelementptr inbounds double, double* %x, i64 1 + %5 = load double, double* %4, align 8 + %6 = fadd double %5, %5 + %7 = fadd double %6, %5 + store double %7, double* %4, align 8 + %8 = getelementptr inbounds double, double* %x, i64 2 + %9 = load double, double* %8, align 8 + %10 = fadd double %9, %9 + %11 = fadd double %10, %9 + store double %11, double* %8, align 8 + %12 = getelementptr inbounds double, double* %x, i64 3 + %13 = load double, double* %12, align 8 + %14 = fadd double %13, %13 + %15 = fadd double %14, %13 + store double %15, double* %12, align 8 ret void } -attributes #0 = { nounwind } diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index a59192d718cf4..7c8955b28fa2d 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -1595,3 +1595,14 @@ entry: store i32 %load, i32* %a.gep1 ret void } + +define void @PR23737() { +; CHECK-LABEL: @PR23737( +; CHECK: store atomic volatile {{.*}} seq_cst +; CHECK: load atomic volatile {{.*}} seq_cst +entry: + %ptr = alloca i64, align 8 + store atomic volatile i64 0, i64* %ptr seq_cst, align 8 + %load = load atomic volatile i64, i64* %ptr seq_cst, align 8 + ret void +} |