diff options
Diffstat (limited to 'test/Transforms/LoopStrengthReduce')
27 files changed, 474 insertions, 38 deletions
diff --git a/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll b/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll index af3a53708b49f..ccf8ebdd5d13b 100644 --- a/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll +++ b/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll @@ -5,7 +5,7 @@ target triple = "x86-apple-darwin" ; Verify that identical edges are merged. rdar://problem/6453893 -; CHECK: @test1 +; CHECK-LABEL: @test1( ; CHECK: bb89: ; CHECK: phi i8* [ %lsr.iv.next1, %bbA.bb89_crit_edge ], [ %lsr.iv.next1, %bbB.bb89_crit_edge ]{{$}} @@ -43,7 +43,7 @@ exit: } ; Handle single-predecessor phis: PR13756 -; CHECK: @test2 +; CHECK-LABEL: @test2( ; CHECK: bb89: ; CHECK: phi i8* [ %lsr.iv.next1, %bbA ], [ %lsr.iv.next1, %bbA ], [ %lsr.iv.next1, %bbA ]{{$}} define i8* @test2() { diff --git a/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll b/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll index 1ee9bb409d917..83963e3126da9 100644 --- a/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll +++ b/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll @@ -5,7 +5,7 @@ target triple = "x86-apple-darwin" -; CHECK: @test +; CHECK-LABEL: @test( ; multiplies are hoisted out of the loop ; CHECK: while.body.lr.ph: ; CHECK: mul i64 diff --git a/test/Transforms/LoopStrengthReduce/2011-10-13-SCEVChain.ll b/test/Transforms/LoopStrengthReduce/2011-10-13-SCEVChain.ll index 4718529bfd507..484fefaad413c 100644 --- a/test/Transforms/LoopStrengthReduce/2011-10-13-SCEVChain.ll +++ b/test/Transforms/LoopStrengthReduce/2011-10-13-SCEVChain.ll @@ -8,7 +8,7 @@ target triple = "x86_64-apple-darwin" ; Verify that -loop-reduce runs without "hanging" and reuses post-inc ; expansions. -; CHECK: @test +; CHECK-LABEL: @test( ; CHECK: icmp ; CHECK: icmp ; CHECK: icmp diff --git a/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll b/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll index 60cc7a5163268..068b716651d83 100644 --- a/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll +++ b/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll @@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin" -; CHECK: @test +; CHECK-LABEL: @test( ; CHECK: phi ; CHECK-NOT: phi define void @test(i32 %rowStride) ssp align 2 { diff --git a/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll index 392a8bcf89dbf..6c128feb541ca 100644 --- a/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll +++ b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll @@ -9,7 +9,7 @@ target triple = "i386-unknown-freebsd10.0" @b = external global [121 x i32] -; CHECK: @vb +; CHECK-LABEL: @vb( ; Outer recurrence: ; CHECK: %lsr.iv1 = phi [121 x i32]* ; Inner recurrence: diff --git a/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll index d7f5723188c2b..87dd39730ec1f 100644 --- a/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll +++ b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll @@ -11,7 +11,7 @@ target triple = "i386-apple-darwin" ; cannot find a preheader, so they should be expanded in the loop header ; (bb7.lr.ph.us) below the existing phi i.12.us. ; Currently, LSR won't kick in on such loops. -; CHECK: @nopreheader +; CHECK-LABEL: @nopreheader( ; CHECK: bb7.us: ; CHECK-NOT: phi float* ; CHECK: %j.01.us = phi i32 @@ -54,7 +54,7 @@ return: ; preds = %bb9, %bb9.us, %bb10 ; In this case, SCEVExpander simply cannot materialize the AddRecExpr ; that LSR picks. We must detect that %bb8.preheader does not have a ; preheader and avoid performing LSR on %bb7. -; CHECK: @nopreheader2 +; CHECK-LABEL: @nopreheader2( ; CHECK: bb7: ; CHECK: %indvar = phi i32 define fastcc void @nopreheader2([200 x i32]* nocapture %Array2) nounwind { diff --git a/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll index 3036a7e38bbf4..94a037ec28eba 100644 --- a/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll +++ b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll @@ -8,7 +8,7 @@ target triple = "x86_64-apple-darwin10.0.0" ; while.cond197 is a dominates the simplified loop while.cond238 but ; has no with no preheader. ; -; CHECK: @nopreheader +; CHECK-LABEL: @nopreheader( ; CHECK: %while.cond238 ; CHECK: phi i64 ; CHECK-NOT: phi diff --git a/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll index 0172492edc997..5fa3838c82979 100644 --- a/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll +++ b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll @@ -7,7 +7,7 @@ target triple = "x86_64-apple-darwin10.0.0" ; IVUsers should not consider tmp128 a valid user because it is not in a ; simplified loop nest. -; CHECK: @nopreheader +; CHECK-LABEL: @nopreheader( ; CHECK: for.cond: ; CHECK: %tmp128 = add i64 %0, %indvar65 define void @nopreheader(i8* %cmd) nounwind ssp { diff --git a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll index 8bac639ae559d..ea1d65b1652e5 100644 --- a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll +++ b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll @@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin" @g_3 = global i32 0, align 4 ; Ensure that %div.i.i.us is not hoisted. -; CHECK: @main +; CHECK-LABEL: @main( ; CHECK: for.body.i.i.us: ; CHECK: %div.i.i.i.us ; CHECK: %cmp5.i.i.us diff --git a/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll index bce234cd40664..8a5a0a4c5fcda 100644 --- a/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll +++ b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" -; CHECK: @test +; CHECK-LABEL: @test( ; CHECK: bb8: ; CHECK-NEXT: phi i8 ; CHECK-NEXT: phi i8 diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll index 8fbddf8ae4c82..79dbf0d537031 100644 --- a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll @@ -8,14 +8,14 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" ; Verify that nothing uses the "dead" ptrtoint from "undef". -; CHECK: @VerifyDiagnosticConsumerTest +; CHECK-LABEL: @VerifyDiagnosticConsumerTest( ; CHECK: bb: -; CHECK: %0 = ptrtoint i8* undef to i64 -; CHECK-NOT: %0 +; "dead" ptrpoint not emitted (or dead code eliminated) with +; current LSR cost model. +; CHECK-NOT: = ptrtoint i8* undef to i64 ; CHECK: .lr.ph -; CHECK-NOT: %0 -; CHECK: sub i64 %7, %tmp6 -; CHECK-NOT: %0 +; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp5, 1 +; CHECK: sub i64 [[TMP]], %tmp6 ; CHECK: ret void define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 { bb: diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll index ee3cc4dd78fc6..ab7f20f0129b8 100644 --- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll +++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll @@ -138,7 +138,7 @@ for.end: ; preds = %for.body, %entry ; Consequently, we should *not* form any chains. ; ; A9: foldedidx: -; A9: ldrb.w {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3] +; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3] define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp { entry: br label %for.body diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg index bac2ffab31d92..8a3ba96497e7d 100644 --- a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg +++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg @@ -1,5 +1,3 @@ -config.suffixes = ['.ll'] - targets = set(config.root.targets_to_build.split()) if not 'ARM' in targets: config.unsupported = True diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll index a932b47925865..2fe62e39fc93c 100644 --- a/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll +++ b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll @@ -5,7 +5,7 @@ ; rdar://9786536 ; First, make sure LSR doesn't crash on an empty IVUsers list. -; CHECK: @dummyIV +; CHECK-LABEL: @dummyIV( ; CHECK-NOT: phi ; CHECK-NOT: sitofp ; CHECK: br @@ -24,7 +24,7 @@ for.end: } ; Now check that the computed double constant is correct. -; CHECK: @doubleIV +; CHECK-LABEL: @doubleIV( ; CHECK: phi double [ -3.900000e+01, %entry ] ; CHECK: br define void @doubleIV() nounwind { diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll index cb23ad01a497c..8053940df13f8 100644 --- a/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll +++ b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll @@ -11,9 +11,9 @@ declare i1 @check() nounwind ; Check that LSR did something close to the behavior at the time of the bug. ; CHECK: @sqlite3DropTriggerPtr -; CHECK: incq %rax +; CHECK: incq %r{{[a-d]}}x ; CHECK: jne -; CHECK: decq %rax +; CHECK: decq %r{{[a-d]}}x ; CHECK: ret define i64 @sqlite3DropTriggerPtr() nounwind { bb: diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll index eedfc200f48ba..001a1d695c992 100644 --- a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll +++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -loop-reduce -S | FileCheck %s ; -; Test LSR's ability to prune formulae that refer to nonexistant +; Test LSR's ability to prune formulae that refer to nonexistent ; AddRecs in other loops. ; ; Unable to reduce this case further because it requires LSR to exceed @@ -14,7 +14,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-darwin" -; CHECK: @test +; CHECK-LABEL: @test( ; CHECK: for.body: ; CHECK: %lsr.iv ; CHECK-NOT: %dummyout diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg index da2db5a45f9c6..ba763cf03ffcc 100644 --- a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg +++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg @@ -1,5 +1,3 @@ -config.suffixes = ['.ll'] - targets = set(config.root.targets_to_build.split()) if not 'X86' in targets: config.unsupported = True diff --git a/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll new file mode 100644 index 0000000000000..6333291aa65d0 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll @@ -0,0 +1,88 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s +; CHECK: bb1: +; CHECK: load double addrspace(1)* [[IV:%[^,]+]] +; CHECK: store double {{.*}}, double addrspace(1)* [[IV]] + +; CHECK-NOT: cast +; Make sure the GEP has the right index type +; CHECK: getelementptr double addrspace(1)* [[IV]], i16 1 +; CHECK: br {{.*}} label %bb1 + +; Make sure the GEP has the right index type +; CHECK: getelementptr double addrspace(1)* {{.*}}, i16 + + +; This test tests several things. The load and store should use the +; same address instead of having it computed twice, and SCEVExpander should +; be able to reconstruct the full getelementptr, despite it having a few +; obstacles set in its way. +; We only check that the inner loop (bb1-bb2) is "reduced" because LSR +; currently only operates on inner loops. + +target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64" + +define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind { +entry: + %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1] + br i1 %tmp, label %bb.nph3, label %return + +bb.nph: ; preds = %bb2.preheader + %tmp1 = mul i64 %tmp16, %i.02 ; <i64> [#uses=1] + %tmp2 = mul i64 %tmp19, %i.02 ; <i64> [#uses=1] + br label %bb1 + +bb1: ; preds = %bb2, %bb.nph + %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; <i64> [#uses=3] + %tmp3 = add i64 %j.01, %tmp1 ; <i64> [#uses=1] + %tmp4 = add i64 %j.01, %tmp2 ; <i64> [#uses=1] + %z0 = add i64 %tmp3, 5203 + %tmp5 = getelementptr double addrspace(1)* %p, i64 %z0 ; <double addrspace(1)*> [#uses=1] + %tmp6 = load double addrspace(1)* %tmp5, align 8 ; <double> [#uses=1] + %tmp7 = fdiv double %tmp6, 2.100000e+00 ; <double> [#uses=1] + %z1 = add i64 %tmp4, 5203 + %tmp8 = getelementptr double addrspace(1)* %p, i64 %z1 ; <double addrspace(1)*> [#uses=1] + store double %tmp7, double addrspace(1)* %tmp8, align 8 + %tmp9 = add i64 %j.01, 1 ; <i64> [#uses=2] + br label %bb2 + +bb2: ; preds = %bb1 + %tmp10 = icmp slt i64 %tmp9, %m ; <i1> [#uses=1] + br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge + +bb2.bb3_crit_edge: ; preds = %bb2 + br label %bb3 + +bb3: ; preds = %bb2.preheader, %bb2.bb3_crit_edge + %tmp11 = add i64 %i.02, 1 ; <i64> [#uses=2] + br label %bb4 + +bb4: ; preds = %bb3 + %tmp12 = icmp slt i64 %tmp11, %n ; <i1> [#uses=1] + br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge + +bb4.return_crit_edge: ; preds = %bb4 + br label %bb4.return_crit_edge.split + +bb4.return_crit_edge.split: ; preds = %bb.nph3, %bb4.return_crit_edge + br label %return + +bb.nph3: ; preds = %entry + %tmp13 = icmp sgt i64 %m, 0 ; <i1> [#uses=1] + %tmp14 = mul i64 %n, 37 ; <i64> [#uses=1] + %tmp15 = mul i64 %tmp14, %o ; <i64> [#uses=1] + %tmp16 = mul i64 %tmp15, %q ; <i64> [#uses=1] + %tmp17 = mul i64 %n, 37 ; <i64> [#uses=1] + %tmp18 = mul i64 %tmp17, %o ; <i64> [#uses=1] + %tmp19 = mul i64 %tmp18, %q ; <i64> [#uses=1] + br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split + +bb.nph3.split: ; preds = %bb.nph3 + br label %bb2.preheader + +bb2.preheader: ; preds = %bb.nph3.split, %bb4 + %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; <i64> [#uses=3] + br i1 true, label %bb.nph, label %bb3 + +return: ; preds = %bb4.return_crit_edge.split, %entry + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/address-space-loop.ll b/test/Transforms/LoopStrengthReduce/address-space-loop.ll new file mode 100644 index 0000000000000..9c1b213b5979a --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/address-space-loop.ll @@ -0,0 +1,56 @@ +; RUN: opt -S -loop-reduce < %s | FileCheck %s + +; LSR shouldn't consider %t8 to be an interesting user of %t6, and it +; should be able to form pretty GEPs. + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Copy of uglygep with a different address space +; This tests expandAddToGEP uses the right smaller integer type for +; another address space +define void @Z4() nounwind { +; CHECK-LABEL: @Z4( +bb: + br label %bb3 + +bb1: ; preds = %bb3 + br i1 undef, label %bb10, label %bb2 + +bb2: ; preds = %bb1 + %t = add i16 %t4, 1 ; <i16> [#uses=1] + br label %bb3 + +bb3: ; preds = %bb2, %bb + %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ] ; <i16> [#uses=3] + br label %bb1 + +; CHECK: bb10: +; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0 +; Host %t2 computation outside the loop. +; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4 +; CHECK-NEXT: br label %bb14 +bb10: ; preds = %bb9 + %t7 = icmp eq i16 %t4, 0 ; <i1> [#uses=1] + %t3 = add i16 %t4, 16 ; <i16> [#uses=1] + br label %bb14 + +; CHECK: bb14: +; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]] +; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef +; Fold %t3's add within the address. +; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4 +; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)* +; Use the induction variable (%t4) to access the right element +; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4 +; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]] +; CHECK-NEXT: br label %bb14 +bb14: ; preds = %bb14, %bb10 + %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1] + store i8 undef, i8 addrspace(1)* %t2 + %t6 = load float addrspace(1)* addrspace(1)* undef + %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)* ; <i8*> [#uses=1] + %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1] + store i8 undef, i8 addrspace(1)* %t9 + br label %bb14 +} + diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll index ff8cab83137bd..3ba93ff748351 100644 --- a/test/Transforms/LoopStrengthReduce/dominate-assert.ll +++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll @@ -68,3 +68,46 @@ bb7: catch i8* null ret void } + +; PR17425 +define void @i() { +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %c.0 = phi i16* [ undef, %entry ], [ %incdec.ptr, %while.cond ] + %incdec.ptr = getelementptr inbounds i16* %c.0, i64 1 + br i1 undef, label %while.cond1, label %while.cond + +while.cond1: ; preds = %while.cond1, %while.cond + %c.1 = phi i16* [ %incdec.ptr5, %while.cond1 ], [ %c.0, %while.cond ] + %incdec.ptr5 = getelementptr inbounds i16* %c.1, i64 1 + br i1 undef, label %while.cond7, label %while.cond1 + +while.cond7: ; preds = %while.cond7, %while.cond1 + %0 = phi i16* [ %incdec.ptr10, %while.cond7 ], [ %c.1, %while.cond1 ] + %incdec.ptr10 = getelementptr inbounds i16* %0, i64 1 + br i1 undef, label %while.cond12.preheader, label %while.cond7 + +while.cond12.preheader: ; preds = %while.cond7 + br i1 undef, label %while.end16, label %while.body13.lr.ph + +while.body13: ; preds = %if.else, %while.body13.lr.ph + %1 = phi i16* [ %2, %while.body13.lr.ph ], [ %incdec.ptr15, %if.else ] + br i1 undef, label %while.cond12.outer.loopexit, label %if.else + +while.cond12.outer.loopexit: ; preds = %while.body13 + br i1 undef, label %while.end16, label %while.body13.lr.ph + +while.body13.lr.ph: ; preds = %while.cond12.outer.loopexit, %while.cond12.preheader + %2 = phi i16* [ %1, %while.cond12.outer.loopexit ], [ undef, %while.cond12.preheader ] + br label %while.body13 + +if.else: ; preds = %while.body13 + %incdec.ptr15 = getelementptr inbounds i16* %1, i64 1 + %cmp = icmp eq i16* %incdec.ptr15, %0 + br i1 %cmp, label %while.end16, label %while.body13 + +while.end16: ; preds = %if.else, %while.cond12.outer.loopexit, %while.cond12.preheader + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/ivchain.ll b/test/Transforms/LoopStrengthReduce/ivchain.ll index ce7ad198de49f..233800b71c64e 100644 --- a/test/Transforms/LoopStrengthReduce/ivchain.ll +++ b/test/Transforms/LoopStrengthReduce/ivchain.ll @@ -6,7 +6,7 @@ %struct = type { i8*, i8*, i16, i64, i16, i16, i16, i64, i64, i16, i8*, i64, i64, i64 } -; CHECK: @test +; CHECK-LABEL: @test( ; CHECK: for.body: ; CHECK: lsr.iv = phi %struct ; CHECK: br diff --git a/test/Transforms/LoopStrengthReduce/lit.local.cfg b/test/Transforms/LoopStrengthReduce/lit.local.cfg deleted file mode 100644 index 19eebc0ac7ac3..0000000000000 --- a/test/Transforms/LoopStrengthReduce/lit.local.cfg +++ /dev/null @@ -1 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll new file mode 100644 index 0000000000000..255cf41a81746 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll @@ -0,0 +1,42 @@ +; RUN: opt -loop-reduce -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +; PR15470: LSR miscompile. The test2 function should return '1'. +; +; SCEV expander cannot expand quadratic recurrences outside of the +; loop. This recurrence depends on %sub.us, so can't be expanded. +; +; CHECK-LABEL: @test2 +; CHECK-LABEL: test2.loop: +; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ] +; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216 +; +; CHECK=LABEL: for.end: +; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us +; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us +; CHECK: %f = ashr i32 %sext.us, 24 +; CHECK: ret i32 %f +define i32 @test2() { +entry: + br label %test2.loop + +test2.loop: + %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ] + %inc11.us = add nsw i32 %inc1115.us, 1 + %cmp.us = icmp slt i32 %inc11.us, 2 + br i1 %cmp.us, label %test2.loop, label %for.end + +for.end: + %tobool.us = icmp eq i32 %inc1115.us, 0 + %sub.us = select i1 %tobool.us, i32 0, i32 0 + %mul.us = shl i32 %inc1115.us, 24 + %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us + %sext.us = mul i32 %mul.us, %sub.cond.us + %f = ashr i32 %sext.us, 24 + br label %exit + +exit: + ret i32 %f +} diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll index 011824116b3a0..65aa61fb937e3 100644 --- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll +++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll @@ -1,18 +1,50 @@ -; RUN: opt < %s -analyze -iv-users | grep "{1,+,3,+,2}<%loop> (post-inc with loop %loop)" +; RUN: opt < %s -analyze -iv-users | FileCheck %s ; The value of %r is dependent on a polynomial iteration expression. - +; +; CHECK-LABEL: IV Users for loop %foo.loop +; CHECK: {1,+,3,+,2}<%foo.loop> define i64 @foo(i64 %n) { entry: - br label %loop + br label %foo.loop -loop: - %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] +foo.loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %foo.loop ] %indvar.next = add i64 %indvar, 1 %c = icmp eq i64 %indvar.next, %n - br i1 %c, label %exit, label %loop + br i1 %c, label %exit, label %foo.loop exit: %r = mul i64 %indvar.next, %indvar.next ret i64 %r } + +; PR15470: LSR miscompile. The test2 function should return '1'. +; +; SCEV does not know how to denormalize chained recurrences, so make +; sure they aren't marked as post-inc users. +; +; CHECK-LABEL: IV Users for loop %test2.loop +; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24 +define i32 @test2() { +entry: + br label %test2.loop + +test2.loop: + %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ] + %inc11.us = add nsw i32 %inc1115.us, 1 + %cmp.us = icmp slt i32 %inc11.us, 2 + br i1 %cmp.us, label %test2.loop, label %for.end + +for.end: + %tobool.us = icmp eq i32 %inc1115.us, 0 + %sub.us = select i1 %tobool.us, i32 0, i32 0 + %mul.us = shl i32 %inc1115.us, 24 + %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us + %sext.us = mul i32 %mul.us, %sub.cond.us + %f = ashr i32 %sext.us, 24 + br label %exit + +exit: + ret i32 %f +} diff --git a/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll b/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll new file mode 100644 index 0000000000000..a652a7661e23e --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll @@ -0,0 +1,68 @@ +; RUN: opt -loop-reduce %s -S -o - | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32" +target triple = "i686-pc-win32" + +; <rdar://problem/14199725> Assertion failed: (CurScaleCost >= 0 && "Legal addressing mode has an illegal cost!") +; CHECK-LABEL: @scalingFactorCrash( +define void @scalingFactorCrash() { + br i1 undef, label %1, label %24 + +; <label>:1 ; preds = %0 + br i1 undef, label %2, label %24 + +; <label>:2 ; preds = %1 + br i1 undef, label %3, label %24 + +; <label>:3 ; preds = %2 + br i1 undef, label %4, label %24 + +; <label>:4 ; preds = %3 + br i1 undef, label %24, label %6 + +; <label>:5 ; preds = %6 + br i1 undef, label %24, label %7 + +; <label>:6 ; preds = %6, %4 + br i1 undef, label %6, label %5 + +; <label>:7 ; preds = %9, %5 + br label %8 + +; <label>:8 ; preds = %8, %7 + br i1 undef, label %9, label %8 + +; <label>:9 ; preds = %8 + br i1 undef, label %7, label %10 + +; <label>:10 ; preds = %9 + br i1 undef, label %24, label %11 + +; <label>:11 ; preds = %10 + br i1 undef, label %15, label %13 + +; <label>:12 ; preds = %14 + br label %15 + +; <label>:13 ; preds = %11 + br label %14 + +; <label>:14 ; preds = %14, %13 + br i1 undef, label %14, label %12 + +; <label>:15 ; preds = %12, %11 + br i1 undef, label %16, label %24 + +; <label>:16 ; preds = %16, %15 + %17 = phi i32 [ %21, %16 ], [ undef, %15 ] + %18 = sub i32 %17, 1623127498 + %19 = getelementptr inbounds i32* undef, i32 %18 + store i32 undef, i32* %19, align 4 + %20 = add i32 %17, 1623127499 + %21 = add i32 %20, -1623127498 + %22 = add i32 %21, -542963121 + %23 = icmp ult i32 %22, undef + br i1 undef, label %16, label %24 + +; <label>:24 ; preds = %16, %15, %10, %5, %4, %3, %2, %1, %0 + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll new file mode 100644 index 0000000000000..2c65261f57f53 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +; LSR shouldn't consider %t8 to be an interesting user of %t6, and it +; should be able to form pretty GEPs. + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Copy of uglygep with a different address space +; This tests expandAddToGEP uses the right smaller integer type for +; another address space +define void @Z4() nounwind { +; CHECK: define void @Z4 +bb: + br label %bb3 + +bb1: ; preds = %bb3 + br i1 undef, label %bb10, label %bb2 + +bb2: ; preds = %bb1 + %t = add i16 %t4, 1 ; <i16> [#uses=1] + br label %bb3 + +bb3: ; preds = %bb2, %bb + %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ] ; <i16> [#uses=3] + br label %bb1 + +; CHECK: bb10: +; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0 +; Host %t2 computation outside the loop. +; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4 +; CHECK-NEXT: br label %bb14 +bb10: ; preds = %bb9 + %t7 = icmp eq i16 %t4, 0 ; <i1> [#uses=1] + %t3 = add i16 %t4, 16 ; <i16> [#uses=1] + br label %bb14 + +; CHECK: bb14: +; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]] +; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef +; Fold %t3's add within the address. +; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4 +; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)* +; Use the induction variable (%t4) to access the right element +; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4 +; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]] +; CHECK-NEXT: br label %bb14 +bb14: ; preds = %bb14, %bb10 + %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1] + store i8 undef, i8 addrspace(1)* %t2 + %t6 = load float addrspace(1)* addrspace(1)* undef + %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)* ; <i8*> [#uses=1] + %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1] + store i8 undef, i8 addrspace(1)* %t9 + br label %bb14 +} + diff --git a/test/Transforms/LoopStrengthReduce/uglygep.ll b/test/Transforms/LoopStrengthReduce/uglygep.ll index 8af5cf1dfd726..4562d29a0a20b 100644 --- a/test/Transforms/LoopStrengthReduce/uglygep.ll +++ b/test/Transforms/LoopStrengthReduce/uglygep.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-reduce -S | not grep uglygep +; RUN: opt < %s -loop-reduce -S | FileCheck %s ; LSR shouldn't consider %t8 to be an interesting user of %t6, and it ; should be able to form pretty GEPs. @@ -6,6 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" define void @Z4() nounwind { +; CHECK-LABEL: define void @Z4( bb: br label %bb3 @@ -20,11 +21,26 @@ bb3: ; preds = %bb2, %bb %t4 = phi i64 [ %t, %bb2 ], [ 0, %bb ] ; <i64> [#uses=3] br label %bb1 +; CHECK: bb10: +; CHECK-NEXT: %t7 = icmp eq i64 %t4, 0 +; Host %t2 computation outside the loop. +; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8* undef, i64 %t4 +; CHECK-NEXT: br label %bb14 bb10: ; preds = %bb9 %t7 = icmp eq i64 %t4, 0 ; <i1> [#uses=1] %t3 = add i64 %t4, 16 ; <i64> [#uses=1] br label %bb14 +; CHECK: bb14: +; CHECK-NEXT: store i8 undef, i8* [[SCEVGEP]] +; CHECK-NEXT: %t6 = load float** undef +; Fold %t3's add within the address. +; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float* %t6, i64 4 +; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float* [[SCEVGEP1]] to i8* +; Use the induction variable (%t4) to access the right element +; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8* [[SCEVGEP2]], i64 %t4 +; CHECK-NEXT: store i8 undef, i8* [[ADDRESS]] +; CHECK-NEXT: br label %bb14 bb14: ; preds = %bb14, %bb10 %t2 = getelementptr inbounds i8* undef, i64 %t4 ; <i8*> [#uses=1] store i8 undef, i8* %t2 @@ -36,9 +52,15 @@ bb14: ; preds = %bb14, %bb10 } define fastcc void @TransformLine() nounwind { +; CHECK-LABEL: @TransformLine( bb: br label %loop0 +; CHECK: loop0: +; Induction variable is initialized to -2. +; CHECK-NEXT: [[PHIIV:%[^ ]+]] = phi i32 [ [[IVNEXT:%[^ ]+]], %loop0 ], [ -2, %bb ] +; CHECK-NEXT: [[IVNEXT]] = add i32 [[PHIIV]], 1 +; CHECK-NEXT: br i1 false, label %loop0, label %bb0 loop0: ; preds = %loop0, %bb %i0 = phi i32 [ %i0.next, %loop0 ], [ 0, %bb ] ; <i32> [#uses=2] %i0.next = add i32 %i0, 1 ; <i32> [#uses=1] @@ -47,18 +69,52 @@ loop0: ; preds = %loop0, %bb bb0: ; preds = %loop0 br label %loop1 +; CHECK: loop1: +; CHECK-NEXT: %i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ] +; IVNEXT covers the uses of %i0 and %t0. +; Therefore, %t0 has been removed. +; The critical edge has been split. +; CHECK-NEXT: br i1 false, label %bb2, label %[[LOOP1BB6:.+]] loop1: ; preds = %bb5, %bb0 %i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ] ; <i32> [#uses=4] %t0 = add i32 %i0, %i1 ; <i32> [#uses=1] br i1 false, label %bb2, label %bb6 +; CHECK: bb2: +; Critical edge split. +; CHECK-NEXT: br i1 true, label %[[BB2BB6:[^,]+]], label %bb5 bb2: ; preds = %loop1 br i1 true, label %bb6, label %bb5 +; CHECK: bb5: +; CHECK-NEXT: %i1.next = add i32 %i1, 1 +; CHECK-NEXT: br i1 true, label %[[BB5BB6:[^,]+]], label %loop1 bb5: ; preds = %bb2 %i1.next = add i32 %i1, 1 ; <i32> [#uses=1] br i1 true, label %bb6, label %loop1 +; bb5 to bb6 split basic block. +; CHECK: [[BB5BB6]]: +; CHECK-NEXT: [[INITIALVAL:%[^ ]+]] = add i32 [[IVNEXT]], %i1.next +; CHECK-NEXT: br label %[[SPLITTOBB6:.+]] + +; bb2 to bb6 split basic block. +; CHECK: [[BB2BB6]]: +; CHECK-NEXT: br label %[[SPLITTOBB6]] + +; Split basic blocks to bb6. +; CHECK: [[SPLITTOBB6]]: +; CHECK-NEXT: [[INITP8:%[^ ]+]] = phi i32 [ [[INITIALVAL]], %[[BB5BB6]] ], [ undef, %[[BB2BB6]] ] +; CHECK-NEXT: [[INITP9:%[^ ]+]] = phi i32 [ undef, %[[BB5BB6]] ], [ %i1, %[[BB2BB6]] ] +; CHECK-NEXT: br label %bb6 + +; CHECK: [[LOOP1BB6]]: +; CHECK-NEXT: br label %bb6 + +; CHECK: bb6: +; CHECK-NEXT: %p8 = phi i32 [ undef, %[[LOOP1BB6]] ], [ [[INITP8]], %[[SPLITTOBB6]] ] +; CHECK-NEXT: %p9 = phi i32 [ %i1, %[[LOOP1BB6]] ], [ [[INITP9]], %[[SPLITTOBB6]] ] +; CHECK-NEXT: unreachable bb6: ; preds = %bb5, %bb2, %loop1 %p8 = phi i32 [ %t0, %bb5 ], [ undef, %loop1 ], [ undef, %bb2 ] ; <i32> [#uses=0] %p9 = phi i32 [ undef, %bb5 ], [ %i1, %loop1 ], [ %i1, %bb2 ] ; <i32> [#uses=0] |