diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-06-10 13:44:06 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-06-10 13:44:06 +0000 |
commit | 7ab83427af0f77b59941ceba41d509d7d097b065 (patch) | |
tree | cc41c05b1db454e3d802f34df75e636ee922ad87 /test/Transforms/LoopStrengthReduce | |
parent | d288ef4c1788d3a951a7558c68312c2d320612b1 (diff) |
Notes
Diffstat (limited to 'test/Transforms/LoopStrengthReduce')
6 files changed, 27 insertions, 23 deletions
diff --git a/test/Transforms/LoopStrengthReduce/X86/canonical.ll b/test/Transforms/LoopStrengthReduce/X86/canonical.ll index 2dafbb408aad4..6b6acb8687454 100644 --- a/test/Transforms/LoopStrengthReduce/X86/canonical.ll +++ b/test/Transforms/LoopStrengthReduce/X86/canonical.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -S < %s | FileCheck %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -lsr-insns-cost=false -S < %s | FileCheck %s ; Check LSR formula canonicalization will put loop invariant regs before ; induction variable of current loop, so exprs involving loop invariant regs ; can be promoted outside of current loop. diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll index fb63b66137f37..7c01432914fff 100644 --- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -163,7 +163,7 @@ for.end: ; preds = %for.body, %entry ; X64: movzbl -3( ; ; X32: foldedidx: -; X32: movzbl -3( +; X32: movzbl 400( define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp { entry: br label %for.body @@ -275,7 +275,7 @@ exit: ; ; X32: @testCmpZero ; X32: %for.body82.us -; X32: dec +; X32: cmp ; X32: jne define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp { entry: diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll index a7731bfcec565..deca954fea789 100644 --- a/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll +++ b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86-registered-target ; RUN: opt -loop-reduce -S < %s | FileCheck %s ; Strength reduction analysis here relies on IV Users analysis, that @@ -22,16 +22,16 @@ target triple = "x86_64-apple-macosx" ; CHECK-LABEL: @test2 ; CHECK-LABEL: test2.loop: ; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ] -; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ] -; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1 +; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ 1, %entry ] +; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, -1 ; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216 ; ; CHECK-LABEL: for.end: -; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0 +; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next, 0 ; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0 -; CHECK: %1 = sub i32 0, %sub.us -; CHECK: %2 = add i32 %1, %lsr.iv.next -; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2 +; CHECK: %0 = sub i32 0, %sub.us +; CHECK: %1 = sub i32 %0, %lsr.iv.next +; CHECK: %sext.us = mul i32 %lsr.iv.next2, %1 ; CHECK: %f = ashr i32 %sext.us, 24 ; CHECK: ret i32 %f define i32 @test2() { diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll index 4888536bdf819..7f163500a737f 100644 --- a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll +++ b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN -; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS +; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN +; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS ; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s ; OPT test checks that LSR optimize compare for static counter to compare with 0. diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll index 3273cb4e6b5bc..239cc02335067 100644 --- a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll +++ b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN -; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS +; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN +; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS ; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s ; OPT checks that LSR prefers less instructions to less registers. diff --git a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll index b563eb3ad9940..e05d5aa3027be 100644 --- a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll +++ b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-reduce -S < %s | FileCheck %s ; Check when we use an outerloop induction variable inside of an innerloop ; induction value expr, LSR can still choose to use single induction variable @@ -22,18 +23,21 @@ for.body: ; preds = %for.inc, %entry for.body2.preheader: ; preds = %for.body br label %for.body2 -; Check LSR only generates one induction variable for for.body2 and the induction -; variable will be shared by multiple array accesses. +; Check LSR only generates two induction variables for for.body2 one for compare and +; one to shared by multiple array accesses. ; CHECK: for.body2: -; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ 0, %for.body2.preheader ] +; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ] +; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ] ; CHECK-NOT: = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ] -; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* %maxarray, i64 [[LSR]] -; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* [[SCEVGEP1]], i64 1 +; CHECK: [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64 +; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1 +; CHECK: {{.*}} = load i8, i8* [[SCEVGEP1]], align 1 +; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]] ; CHECK: {{.*}} = load i8, i8* [[SCEVGEP2]], align 1 -; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]] -; CHECK: {{.*}} = load i8, i8* [[SCEVGEP3]], align 1 -; CHECK: [[SCEVGEP4:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]] -; CHECK: store i8 {{.*}}, i8* [[SCEVGEP4]], align 1 +; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]] +; CHECK: store i8 {{.*}}, i8* [[SCEVGEP3]], align 1 +; CHECK: [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1 +; CHECK: %exitcond = icmp ne i64 [[LSRNEXT]], 0 ; CHECK: br i1 %exitcond, label %for.body2, label %for.inc.loopexit for.body2: ; preds = %for.body2.preheader, %for.body2 |