diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 09:08:18 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 09:08:18 +0000 |
| commit | 5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (patch) | |
| tree | f5944309621cee4fe0976be6f9ac619b7ebfc4c2 /test/Transforms/LoopStrengthReduce | |
| parent | 68bcb7db193e4bc81430063148253d30a791023e (diff) | |
Notes
Diffstat (limited to 'test/Transforms/LoopStrengthReduce')
14 files changed, 273 insertions, 10 deletions
diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg new file mode 100644 index 000000000000..675f48e199a0 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg @@ -0,0 +1,4 @@ +config.suffixes = ['.ll'] + +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll new file mode 100644 index 000000000000..9a175ad8d355 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll @@ -0,0 +1,33 @@ +; RUN: llc -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid < %s | FileCheck %s +; rdar://10232252 +; Prevent LSR of doing poor choice that cannot be folded in addressing mode + +; Remove the -pre-RA-sched=list-hybrid option after fixing: +; <rdar://problem/12702735> [ARM64][coalescer] need better register +; coalescing for simple unit tests. + +; CHECK: testCase +; CHECK: %while.body{{$}} +; CHECK: ldr [[STREG:x[0-9]+]], [{{x[0-9]+}}], #8 +; CHECK-NEXT: str [[STREG]], [{{x[0-9]+}}], #8 +; CHECK: %while.end +define i32 @testCase() nounwind ssp { +entry: + br label %while.body + +while.body: ; preds = %while.body, %entry + %len.06 = phi i64 [ 1288, %entry ], [ %sub, %while.body ] + %pDst.05 = phi i64* [ inttoptr (i64 6442450944 to i64*), %entry ], [ %incdec.ptr1, %while.body ] + %pSrc.04 = phi i64* [ inttoptr (i64 4294967296 to i64*), %entry ], [ %incdec.ptr, %while.body ] + %incdec.ptr = getelementptr inbounds i64* %pSrc.04, i64 1 + %tmp = load volatile i64* %pSrc.04, align 8 + %incdec.ptr1 = getelementptr inbounds i64* %pDst.05, i64 1 + store volatile i64 %tmp, i64* %pDst.05, align 8 + %sub = add i64 %len.06, -8 + %cmp = icmp sgt i64 %sub, -1 + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body + tail call void inttoptr (i64 6442450944 to void ()*)() nounwind + ret i32 0 +} diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll new file mode 100644 index 000000000000..10b2c3a403cc --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll @@ -0,0 +1,101 @@ +; RUN: llc < %s -O3 -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid | FileCheck %s +; <rdar://problem/11635990> [arm64] [lsr] Inefficient EA/loop-exit calc in bzero_phys +; +; LSR on loop %while.cond should reassociate non-address mode +; expressions at use %cmp16 to avoid sinking computation into %while.body18. +; +; Remove the -pre-RA-sched=list-hybrid option after fixing: +; <rdar://problem/12702735> [ARM64][coalescer] need better register +; coalescing for simple unit tests. + +; CHECK: @memset +; CHECK: %while.body18{{$}} +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #8 +; First set the IVREG variable, then use it +; CHECK-NEXT: sub [[IVREG:x[0-9]+]], +; CHECK: [[IVREG]], #8 +; CHECK-NEXT: cmp [[IVREG]], #7 +; CHECK-NEXT: b.hi +define i8* @memset(i8* %dest, i32 %val, i64 %len) nounwind ssp noimplicitfloat { +entry: + %cmp = icmp eq i64 %len, 0 + br i1 %cmp, label %done, label %while.cond.preheader + +while.cond.preheader: ; preds = %entry + %conv = trunc i32 %val to i8 + br label %while.cond + +while.cond: ; preds = %while.body, %while.cond.preheader + %ptr.0 = phi i8* [ %incdec.ptr, %while.body ], [ %dest, %while.cond.preheader ] + %len.addr.0 = phi i64 [ %dec, %while.body ], [ %len, %while.cond.preheader ] + %cond = icmp eq i64 %len.addr.0, 0 + br i1 %cond, label %done, label %land.rhs + +land.rhs: ; preds = %while.cond + %0 = ptrtoint i8* %ptr.0 to i64 + %and = and i64 %0, 7 + %cmp5 = icmp eq i64 %and, 0 + br i1 %cmp5, label %if.end9, label %while.body + +while.body: ; preds = %land.rhs + %incdec.ptr = getelementptr inbounds i8* %ptr.0, i64 1 + store i8 %conv, i8* %ptr.0, align 1, !tbaa !0 + %dec = add i64 %len.addr.0, -1 + br label %while.cond + +if.end9: ; preds = %land.rhs + %conv.mask = and i32 %val, 255 + %1 = zext i32 %conv.mask to i64 + %2 = shl nuw nsw i64 %1, 8 + %ins18 = or i64 %2, %1 + %3 = shl nuw nsw i64 %1, 16 + %ins15 = or i64 %ins18, %3 + %4 = shl nuw nsw i64 %1, 24 + %5 = shl nuw nsw i64 %1, 32 + %mask8 = or i64 %ins15, %4 + %6 = shl nuw nsw i64 %1, 40 + %mask5 = or i64 %mask8, %5 + %7 = shl nuw nsw i64 %1, 48 + %8 = shl nuw i64 %1, 56 + %mask2.masked = or i64 %mask5, %6 + %mask = or i64 %mask2.masked, %7 + %ins = or i64 %mask, %8 + %9 = bitcast i8* %ptr.0 to i64* + %cmp1636 = icmp ugt i64 %len.addr.0, 7 + br i1 %cmp1636, label %while.body18, label %while.body29.lr.ph + +while.body18: ; preds = %if.end9, %while.body18 + %wideptr.038 = phi i64* [ %incdec.ptr19, %while.body18 ], [ %9, %if.end9 ] + %len.addr.137 = phi i64 [ %sub, %while.body18 ], [ %len.addr.0, %if.end9 ] + %incdec.ptr19 = getelementptr inbounds i64* %wideptr.038, i64 1 + store i64 %ins, i64* %wideptr.038, align 8, !tbaa !2 + %sub = add i64 %len.addr.137, -8 + %cmp16 = icmp ugt i64 %sub, 7 + br i1 %cmp16, label %while.body18, label %while.end20 + +while.end20: ; preds = %while.body18 + %cmp21 = icmp eq i64 %sub, 0 + br i1 %cmp21, label %done, label %while.body29.lr.ph + +while.body29.lr.ph: ; preds = %while.end20, %if.end9 + %len.addr.1.lcssa49 = phi i64 [ %sub, %while.end20 ], [ %len.addr.0, %if.end9 ] + %wideptr.0.lcssa48 = phi i64* [ %incdec.ptr19, %while.end20 ], [ %9, %if.end9 ] + %10 = bitcast i64* %wideptr.0.lcssa48 to i8* + br label %while.body29 + +while.body29: ; preds = %while.body29, %while.body29.lr.ph + %len.addr.235 = phi i64 [ %len.addr.1.lcssa49, %while.body29.lr.ph ], [ %dec26, %while.body29 ] + %ptr.134 = phi i8* [ %10, %while.body29.lr.ph ], [ %incdec.ptr31, %while.body29 ] + %dec26 = add i64 %len.addr.235, -1 + %incdec.ptr31 = getelementptr inbounds i8* %ptr.134, i64 1 + store i8 %conv, i8* %ptr.134, align 1, !tbaa !0 + %cmp27 = icmp eq i64 %dec26, 0 + br i1 %cmp27, label %done, label %while.body29 + +done: ; preds = %while.cond, %while.body29, %while.end20, %entry + ret i8* %dest +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA"} +!2 = metadata !{metadata !"long long", metadata !0} diff --git a/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll b/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll new file mode 100644 index 000000000000..217896e55c66 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll @@ -0,0 +1,70 @@ +; RUN: llc -mcpu=cyclone -debug-only=loop-reduce < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; LSR used to fail here due to a bug in the ReqRegs test. +; CHECK: The chosen solution requires +; CHECK-NOT: No Satisfactory Solution + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios" + +define void @do_integer_add(i64 %iterations, i8* nocapture readonly %cookie) { +entry: + %N = bitcast i8* %cookie to i32* + %0 = load i32* %N, align 4 + %add = add nsw i32 %0, 57 + %cmp56 = icmp eq i64 %iterations, 0 + br i1 %cmp56, label %while.end, label %for.cond.preheader.preheader + +for.cond.preheader.preheader: ; preds = %entry + br label %for.cond.preheader + +while.cond.loopexit: ; preds = %for.body + %add21.lcssa = phi i32 [ %add21, %for.body ] + %dec58 = add i64 %dec58.in, -1 + %cmp = icmp eq i64 %dec58, 0 + br i1 %cmp, label %while.end.loopexit, label %for.cond.preheader + +for.cond.preheader: ; preds = %for.cond.preheader.preheader, %while.cond.loopexit + %dec58.in = phi i64 [ %dec58, %while.cond.loopexit ], [ %iterations, %for.cond.preheader.preheader ] + %a.057 = phi i32 [ %add21.lcssa, %while.cond.loopexit ], [ %add, %for.cond.preheader.preheader ] + br label %for.body + +for.body: ; preds = %for.body, %for.cond.preheader + %a.154 = phi i32 [ %a.057, %for.cond.preheader ], [ %add21, %for.body ] + %i.053 = phi i32 [ 1, %for.cond.preheader ], [ %inc, %for.body ] + %inc = add nsw i32 %i.053, 1 + %add2 = shl i32 %a.154, 1 + %add3 = add nsw i32 %add2, %i.053 + %add4 = shl i32 %add3, 1 + %add5 = add nsw i32 %add4, %i.053 + %add6 = shl i32 %add5, 1 + %add7 = add nsw i32 %add6, %i.053 + %add8 = shl i32 %add7, 1 + %add9 = add nsw i32 %add8, %i.053 + %add10 = shl i32 %add9, 1 + %add11 = add nsw i32 %add10, %i.053 + %add12 = shl i32 %add11, 1 + %add13 = add nsw i32 %add12, %i.053 + %add14 = shl i32 %add13, 1 + %add15 = add nsw i32 %add14, %i.053 + %add16 = shl i32 %add15, 1 + %add17 = add nsw i32 %add16, %i.053 + %add18 = shl i32 %add17, 1 + %add19 = add nsw i32 %add18, %i.053 + %add20 = shl i32 %add19, 1 + %add21 = add nsw i32 %add20, %i.053 + %exitcond = icmp eq i32 %inc, 1001 + br i1 %exitcond, label %while.cond.loopexit, label %for.body + +while.end.loopexit: ; preds = %while.cond.loopexit + %add21.lcssa.lcssa = phi i32 [ %add21.lcssa, %while.cond.loopexit ] + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %a.0.lcssa = phi i32 [ %add, %entry ], [ %add21.lcssa.lcssa, %while.end.loopexit ] + tail call void @use_int(i32 %a.0.lcssa) + ret void +} + +declare void @use_int(i32) diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll index 5d728b528ea5..1d56ddea2446 100644 --- a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll +++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a8 | FileCheck %s +; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - -arm-atomic-cfg-tidy=0 | FileCheck %s ; ; LSR should only check for valid address modes when the IV user is a ; memory address. diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll index ab7f20f0129b..f4edf092641f 100644 --- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll +++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9 +; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9 +; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 -addr-sink-using-gep=1 %s -o - | FileCheck %s -check-prefix=A9 ; @simple is the most basic chain of address induction variables. Chaining ; saves at least one register and avoids complex addressing and setup diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg index 8a3ba96497e7..98c6700c209d 100644 --- a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg +++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg @@ -1,4 +1,3 @@ -targets = set(config.root.targets_to_build.split()) -if not 'ARM' in targets: +if not 'ARM' in config.root.targets: config.unsupported = True diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll index e42b67fd35af..937791dca413 100644 --- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64 ; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X32 ; @simple is the most basic chain of address induction variables. Chaining ; saves at least one register and avoids complex addressing and setup diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg index ba763cf03ffc..e71f3cc4c41e 100644 --- a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg +++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg @@ -1,4 +1,3 @@ -targets = set(config.root.targets_to_build.split()) -if not 'X86' in targets: +if not 'X86' in config.root.targets: config.unsupported = True diff --git a/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll new file mode 100644 index 000000000000..55069947240d --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll @@ -0,0 +1,50 @@ +; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @indvar_expansion(i8* nocapture readonly %rowsptr) { +entry: + br label %for.cond + +; SCEVExpander used to create induction variables in the loop %for.cond while +; expanding the recurrence start value of loop strength reduced values from +; %vector.body. + +; CHECK-LABEL: indvar_expansion +; CHECK: for.cond: +; CHECK-NOT: phi i3 +; CHECK: br i1 {{.+}}, label %for.cond + +for.cond: + %indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ] + %cmp = icmp eq i8 undef, 0 + %indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1 + br i1 %cmp, label %for.cond, label %for.cond2 + +for.cond2: + br i1 undef, label %for.cond2, label %for.body14.lr.ph + +for.body14.lr.ph: + %sext = shl i64 %indvars.iv44, 32 + %0 = ashr exact i64 %sext, 32 + %1 = sub i64 undef, %indvars.iv44 + %2 = and i64 %1, 4294967295 + %3 = add i64 %2, 1 + %fold = add i64 %1, 1 + %n.mod.vf = and i64 %fold, 7 + %n.vec = sub i64 %3, %n.mod.vf + %end.idx.rnd.down = add i64 %n.vec, %0 + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ] + %4 = getelementptr inbounds i8* %rowsptr, i64 %index + %5 = bitcast i8* %4 to <4 x i8>* + %wide.load = load <4 x i8>* %5, align 1 + %index.next = add i64 %index, 8 + %6 = icmp eq i64 %index.next, %end.idx.rnd.down + br i1 %6, label %for.end24, label %vector.body + +for.end24: + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/X86/pr17473.ll b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll index 4204abc7ca22..e7ebaa8ea61b 100644 --- a/test/Transforms/LoopStrengthReduce/X86/pr17473.ll +++ b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -loop-reduce -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" ; LSR shouldn't normalize IV if it can't be denormalized to the original diff --git a/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll b/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll index 90051e3542c2..16bb508d2e27 100644 --- a/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll +++ b/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll @@ -1,5 +1,9 @@ ; Check that this test makes INDVAR and related stuff dead. -; RUN: opt < %s -loop-reduce -S | grep phi | count 2 +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +; CHECK: phi +; CHECK: phi +; CHECK-NOT: phi declare i1 @pred() diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll index 255cf41a8174..aa688d999e60 100644 --- a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll +++ b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll @@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx" ; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ] ; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216 ; -; CHECK=LABEL: for.end: +; CHECK-LABEL: for.end: ; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us ; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us ; CHECK: %f = ashr i32 %sext.us, 24 diff --git a/test/Transforms/LoopStrengthReduce/pr18165.ll b/test/Transforms/LoopStrengthReduce/pr18165.ll index 89adef7bd49d..c38d6a625e88 100644 --- a/test/Transforms/LoopStrengthReduce/pr18165.ll +++ b/test/Transforms/LoopStrengthReduce/pr18165.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-reduce -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" ; LSR shouldn't reuse IV if the resultant offset is not valid for the operand type. |
