diff options
Diffstat (limited to 'test/Transforms/LoopUnroll')
-rw-r--r-- | test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll | 3 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll | 99 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/ephemeral.ll | 44 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll | 133 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/nsw-tripcount.ll | 32 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/partial-unroll-optsize.ll | 19 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/runtime-loop.ll | 27 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/runtime-loop1.ll | 8 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/runtime-loop2.ll | 5 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/scevunroll.ll | 15 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/tripcount-overflow.ll | 30 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll | 124 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/unroll-pragmas.ll | 60 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll | 35 |
14 files changed, 537 insertions, 97 deletions
diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll index 17c91e5c07b1..aae79cbac789 100644 --- a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll +++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll @@ -41,8 +41,7 @@ for.end: ; preds = %for.body, %entry } ; CHECK-LABEL: @test -; CHECK: unr.cmp{{.*}}: -; CHECK: for.body.unr{{.*}}: +; CHECK: for.body.prol{{.*}}: ; CHECK: for.body: ; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body diff --git a/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll new file mode 100644 index 000000000000..7a50fc0a4f49 --- /dev/null +++ b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll @@ -0,0 +1,99 @@ +; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s +define void @unroll_opt_for_size() nounwind optsize { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; CHECK-LABEL: @unroll_opt_for_size +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: icmp + +define void @unroll_default() nounwind { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; CHECK-LABEL: @unroll_default +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: icmp + diff --git a/test/Transforms/LoopUnroll/ephemeral.ll b/test/Transforms/LoopUnroll/ephemeral.ll new file mode 100644 index 000000000000..9d4061390537 --- /dev/null +++ b/test/Transforms/LoopUnroll/ephemeral.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s + +; Make sure this loop is completely unrolled... +; CHECK-LABEL: @test1 +; CHECK: for.body: +; CHECK-NOT: for.end: + +define i32 @test1(i32* nocapture %a) nounwind uwtable readonly { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + + ; This loop will be completely unrolled, even with these extra instructions, + ; but only because they're ephemeral (and, thus, free). + %1 = add nsw i32 %0, 2 + %2 = add nsw i32 %1, 4 + %3 = add nsw i32 %2, 4 + %4 = add nsw i32 %3, 4 + %5 = add nsw i32 %4, 4 + %6 = add nsw i32 %5, 4 + %7 = add nsw i32 %6, 4 + %8 = add nsw i32 %7, 4 + %9 = add nsw i32 %8, 4 + %10 = add nsw i32 %9, 4 + %ca = icmp sgt i32 %10, -7 + call void @llvm.assume(i1 %ca) + + %add = add nsw i32 %0, %sum.01 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 5 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 %add +} + +declare void @llvm.assume(i1) nounwind + diff --git a/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll new file mode 100644 index 000000000000..dcb5d1c28fb2 --- /dev/null +++ b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll @@ -0,0 +1,133 @@ +; REQUIRES: asserts +; RUN: opt < %s -disable-output -stats -loop-unroll -info-output-file - | FileCheck %s --check-prefix=STATS +; STATS: 1 loop-unroll - Number of loops unrolled (completely or otherwise) +; Test that llvm.annotation intrinsic do not count against the loop body size +; and prevent unrolling. +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +@B = common global i32 0, align 4 + +define void @foo(i32* noalias %A, i32 %B, i32 %C) { +entry: + br label %for.body + +; A loop that has a small loop body (except for the annotations) that should be +; unrolled with the default heuristic. Make sure the extra annotations do not +; prevent unrolling +for.body: ; preds = %entry, %for.body + %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + ; The real loop. + %mul = mul nsw i32 %B, %C + %arrayidx = getelementptr inbounds i32* %A, i32 %i.01 + store i32 %mul, i32* %arrayidx, align 4 + %inc = add nsw i32 %i.01, 1 + %exitcond = icmp ne i32 %inc, 4 + + ; A bunch of annotations + %annot.0 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.1 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.2 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.3 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.4 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.5 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.6 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.7 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.8 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.9 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.10 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.11 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.12 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.13 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.14 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.15 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.16 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.17 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.18 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.19 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.20 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.21 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.22 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.23 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.24 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.25 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.26 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.27 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.28 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.29 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.30 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.31 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.32 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.33 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.34 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.35 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.36 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.37 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.38 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.39 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.40 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.41 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.42 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.43 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.44 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.45 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.46 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.47 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.48 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.49 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.50 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.51 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.52 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.53 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.54 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.55 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.56 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.57 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.58 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.59 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.60 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.61 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.62 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.63 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.64 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.65 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.66 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.67 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.68 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.69 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.70 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.71 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.72 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.73 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.74 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.75 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.76 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.77 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.78 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.79 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.80 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.81 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.82 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.83 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.84 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.85 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.86 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.87 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.88 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.89 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.90 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.91 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.92 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.93 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.94 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.95 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.96 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.97 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.98 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.99 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) diff --git a/test/Transforms/LoopUnroll/nsw-tripcount.ll b/test/Transforms/LoopUnroll/nsw-tripcount.ll new file mode 100644 index 000000000000..98cab32a42a6 --- /dev/null +++ b/test/Transforms/LoopUnroll/nsw-tripcount.ll @@ -0,0 +1,32 @@ +; RUN: opt -loop-unroll -S %s | FileCheck %s + +; extern void f(int); +; void test1(int v) { +; for (int i=v; i<=v+1; ++i) +; f(i); +; } +; +; We can use the nsw information to see that the tripcount will be 2, so the +; loop should be unrolled as this is always beneficial + +declare void @f(i32) + +; CHECK-LABEL: @test1 +define void @test1(i32 %v) { +entry: + %add = add nsw i32 %v, 1 + br label %for.body + +for.body: + %i.04 = phi i32 [ %v, %entry ], [ %inc, %for.body ] + tail call void @f(i32 %i.04) + %inc = add nsw i32 %i.04, 1 + %cmp = icmp slt i32 %i.04, %add + br i1 %cmp, label %for.body, label %for.end + +; CHECK: call void @f +; CHECK-NOT: br i1 +; CHECK: call void @f +for.end: + ret void +} diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll index 3179d55e978a..a650317f3df7 100644 --- a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll +++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s ; Loop size = 3, when the function has the optsize attribute, the ; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled -; by 16 times because 3 * 16 < 50. +; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not). define void @unroll_opt_for_size() nounwind optsize { entry: br label %loop @@ -32,4 +32,21 @@ exit: ; CHECK-NEXT: add ; CHECK-NEXT: add ; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add ; CHECK-NEXT: icmp + diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll index a14087dcdce7..3a8777bb1473 100644 --- a/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/test/Transforms/LoopUnroll/runtime-loop.ll @@ -3,15 +3,16 @@ ; Tests for unrolling loops with run-time trip counts ; CHECK: %xtraiter = and i32 %n -; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0 -; CHECK: %lcmp.overflow = icmp eq i32 %n, 0 -; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod -; CHECK: br i1 %lcmp.or, label %unr.cmp +; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; CHECK: %lcmp.overflow = icmp eq i32 %n, 0 +; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod +; CHECK: br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split -; CHECK: unr.cmp{{.*}}: -; CHECK: for.body.unr{{.*}}: -; CHECK: for.body: -; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body +; CHECK: for.body.prol: +; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ] +; CHECK: %prol.iter.sub = sub i32 %prol.iter, 1 +; CHECK: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0 +; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split, !llvm.loop !0 define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { entry: @@ -39,7 +40,7 @@ for.end: ; preds = %for.body, %entry ; even if the -unroll-runtime is specified ; CHECK: for.body: -; CHECK-NOT: for.body.unr: +; CHECK-NOT: for.body.prol: define i32 @test1(i32* nocapture %a) nounwind uwtable readonly { entry: @@ -85,8 +86,8 @@ cond_true138: ; Test run-time unrolling for a loop that counts down by -2. -; CHECK: for.body.unr: -; CHECK: br i1 %cmp.7, label %for.cond.for.end_crit_edge{{.*}}, label %for.body +; CHECK: for.body.prol: +; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly { entry: @@ -113,3 +114,7 @@ for.end: ; preds = %for.cond.for.end_cr %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] ret i16 %res.0.lcssa } + +; CHECK: !0 = distinct !{!0, !1} +; CHECK: !1 = !{!"llvm.loop.unroll.disable"} + diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll index ad99b8cd9c66..38b4f32354a3 100644 --- a/test/Transforms/LoopUnroll/runtime-loop1.ll +++ b/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -1,11 +1,11 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=4 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s ; This tests that setting the unroll count works -; CHECK: unr.cmp: -; CHECK: for.body.unr: +; CHECK: for.body.prol: +; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split ; CHECK: for.body: -; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body +; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll index cbc7af58ff5b..176362a34456 100644 --- a/test/Transforms/LoopUnroll/runtime-loop2.ll +++ b/test/Transforms/LoopUnroll/runtime-loop2.ll @@ -1,10 +1,9 @@ -; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 -unroll-runtime -unroll-count=8 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s ; Choose a smaller, power-of-two, unroll count if the loop is too large. ; This test makes sure we're not unrolling 'odd' counts -; CHECK: unr.cmp: -; CHECK: for.body.unr: +; CHECK: for.body.prol: ; CHECK: for.body: ; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll index c3086e8335f9..20161d798e9e 100644 --- a/test/Transforms/LoopUnroll/scevunroll.ll +++ b/test/Transforms/LoopUnroll/scevunroll.ll @@ -66,16 +66,13 @@ exit2: ; SCEV properly unrolls multi-exit loops. ; -; SCEV cannot currently unroll this loop. -; It should ideally detect a trip count of 5. -; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops. ; CHECK-LABEL: @multiExit( -; CHECKFIXME: getelementptr i32* %base, i32 10 -; CHECKFIXME-NEXT: load i32* -; CHECKFIXME: br i1 false, label %l2.10, label %exit1 -; CHECKFIXME: l2.10: -; CHECKFIXME-NOT: br -; CHECKFIXME: ret i32 +; CHECK: getelementptr i32* %base, i32 10 +; CHECK-NEXT: load i32* +; CHECK: br i1 false, label %l2.10, label %exit1 +; CHECK: l2.10: +; CHECK-NOT: br +; CHECK: ret i32 define i32 @multiExit(i32* %base) nounwind { entry: br label %l1 diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll new file mode 100644 index 000000000000..d59368578ec2 --- /dev/null +++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; When prologue is fully unrolled, the branch on its end is unconditional. +; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow, +; like in this example, where it comes from an argument. +; +; This test is based on an example from here: +; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out +; +; CHECK: while.body.prol: +; CHECK: br i1 +; CHECK: entry.split: + +; Function Attrs: nounwind readnone ssp uwtable +define i32 @foo(i32 %N) #0 { +entry: + br label %while.body + +while.body: ; preds = %while.body, %entry + %i = phi i32 [ 0, %entry ], [ %inc, %while.body ] + %cmp = icmp eq i32 %i, %N + %inc = add i32 %i, 1 + br i1 %cmp, label %while.end, label %while.body + +while.end: ; preds = %while.body + ret i32 %i +} + +attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll index 39da7fa70ade..4f934a688be8 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll @@ -1,19 +1,19 @@ ; RUN: opt < %s -loop-unroll -S | FileCheck %s ; -; Verify that the unrolling pass removes existing loop unrolling metadata +; Verify that the unrolling pass removes existing unroll count metadata ; and adds a disable unrolling node after unrolling is complete. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; #pragma clang loop vectorize(enable) unroll(enable) unroll_count(4) vectorize_width(8) +; #pragma clang loop vectorize(enable) unroll_count(4) vectorize_width(8) ; -; Unroll metadata should be replaces with unroll(disable). Vectorize +; Unroll count metadata should be replaced with unroll(disable). Vectorize ; metadata should be untouched. ; -; CHECK-LABEL: @loop1( +; CHECK-LABEL: @unroll_count_4( ; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]] -define void @loop1(i32* nocapture %a) { +define void @unroll_count_4(i32* nocapture %a) { entry: br label %for.body @@ -30,19 +30,49 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4, metadata !5} -!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} -!3 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true} -!4 = metadata !{metadata !"llvm.loop.unroll.count", i32 4} -!5 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8} +!1 = !{!1, !2, !3, !4} +!2 = !{!"llvm.loop.vectorize.enable", i1 true} +!3 = !{!"llvm.loop.unroll.count", i32 4} +!4 = !{!"llvm.loop.vectorize.width", i32 8} + +; #pragma clang loop unroll(full) +; +; An unroll disable metadata node is only added for the unroll count case. +; In this case, the loop has a full unroll metadata but can't be fully unrolled +; because the trip count is dynamic. The full unroll metadata should remain +; after unrolling. +; +; CHECK-LABEL: @unroll_full( +; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]] +define void @unroll_full(i32* nocapture %a, i32 %b) { +entry: + %cmp3 = icmp sgt i32 %b, 0 + br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !5 + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %b + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 + +for.end: ; preds = %for.body, %entry + ret void +} +!5 = !{!5, !6} +!6 = !{!"llvm.loop.unroll.full"} ; #pragma clang loop unroll(disable) ; ; Unroll metadata should not change. ; -; CHECK-LABEL: @loop2( -; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]] -define void @loop2(i32* nocapture %a) { +; CHECK-LABEL: @unroll_disable( +; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]] +define void @unroll_disable(i32* nocapture %a) { entry: br label %for.body @@ -54,16 +84,66 @@ for.body: ; preds = %for.body, %entry store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 64 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7 for.end: ; preds = %for.body ret void } -!6 = metadata !{metadata !6, metadata !7} -!7 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false} - -; CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[VEC_ENABLE:.*]], metadata ![[WIDTH_8:.*]], metadata ![[UNROLL_DISABLE:.*]]} -; CHECK: ![[VEC_ENABLE]] = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} -; CHECK: ![[WIDTH_8]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 8} -; CHECK: ![[UNROLL_DISABLE]] = metadata !{metadata !"llvm.loop.unroll.enable", i1 false} -; CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_DISABLE:.*]]} +!7 = !{!7, !8} +!8 = !{!"llvm.loop.unroll.disable"} + +; This function contains two loops which share the same llvm.loop metadata node +; with an llvm.loop.unroll.count 2 hint. Both loops should be unrolled. This +; verifies that adding disable metadata to a loop after unrolling doesn't affect +; other loops which previously shared the same llvm.loop metadata. +; +; CHECK-LABEL: @shared_metadata( +; CHECK: store i32 +; CHECK: store i32 +; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]] +; CHECK: store i32 +; CHECK: store i32 +; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]] +define void @shared_metadata(i32* nocapture %List) #0 { +entry: + br label %for.body3 + +for.body3: ; preds = %for.body3, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32* %List, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add4 = add nsw i32 %0, 10 + store i32 %add4, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 4 + br i1 %exitcond, label %for.body3.1.preheader, label %for.body3, !llvm.loop !9 + +for.body3.1.preheader: ; preds = %for.body3 + br label %for.body3.1 + +for.body3.1: ; preds = %for.body3.1.preheader, %for.body3.1 + %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ] + %1 = add nsw i64 %indvars.iv.1, 1 + %arrayidx.1 = getelementptr inbounds i32* %List, i64 %1 + %2 = load i32* %arrayidx.1, align 4 + %add4.1 = add nsw i32 %2, 10 + store i32 %add4.1, i32* %arrayidx.1, align 4 + %exitcond.1 = icmp eq i64 %1, 4 + br i1 %exitcond.1, label %for.inc5.1, label %for.body3.1, !llvm.loop !9 + +for.inc5.1: ; preds = %for.body3.1 + ret void +} +!9 = !{!9, !10} +!10 = !{!"llvm.loop.unroll.count", i32 2} + + +; CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[VEC_ENABLE:.*]], ![[WIDTH_8:.*]], ![[UNROLL_DISABLE:.*]]} +; CHECK: ![[VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +; CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8} +; CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"} +; CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[UNROLL_FULL:.*]]} +; CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"} +; CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[UNROLL_DISABLE:.*]]} +; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[UNROLL_DISABLE:.*]]} +; CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]]} diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll index e1b24e44b5ab..d53fa49cc9f5 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -54,12 +54,12 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!1 = metadata !{metadata !1, metadata !2} -!2 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false} +!1 = !{!1, !2} +!2 = !{!"llvm.loop.unroll.disable"} ; loop64 has a high enough count that it should *not* be unrolled by ; the default unrolling heuristic. It serves as the control for the -; unroll(enable) pragma test loop64_with_.* tests below. +; unroll(full) pragma test loop64_with_.* tests below. ; ; CHECK-LABEL: @loop64( ; CHECK: store i32 @@ -83,7 +83,7 @@ for.end: ; preds = %for.body ret void } -; #pragma clang loop unroll(enable) +; #pragma clang loop unroll(full) ; Loop should be fully unrolled. ; ; CHECK-LABEL: @loop64_with_enable( @@ -105,8 +105,8 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!3 = metadata !{metadata !3, metadata !4} -!4 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true} +!3 = !{!3, !4} +!4 = !{!"llvm.loop.unroll.full"} ; #pragma clang loop unroll_count(4) ; Loop should be unrolled 4 times. @@ -135,40 +135,10 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!5 = metadata !{metadata !5, metadata !6} -!6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4} +!5 = !{!5, !6} +!6 = !{!"llvm.loop.unroll.count", i32 4} - -; #pragma clang loop unroll_count(enable) unroll_count(4) -; Loop should be unrolled 4 times. -; -; CHECK-LABEL: @loop64_with_enable_and_count4( -; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 -; CHECK-NOT: store i32 -; CHECK: br i1 -define void @loop64_with_enable_and_count4(i32* nocapture %a) { -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 - %inc = add nsw i32 %0, 1 - store i32 %inc, i32* %arrayidx, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 64 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7 - -for.end: ; preds = %for.body - ret void -} -!7 = metadata !{metadata !7, metadata !6, metadata !4} - -; #pragma clang loop unroll_count(enable) +; #pragma clang loop unroll(full) ; Full unrolling is requested, but loop has a dynamic trip count so ; no unrolling should occur. ; @@ -195,7 +165,7 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } -!8 = metadata !{metadata !8, metadata !4} +!8 = !{!8, !4} ; #pragma clang loop unroll_count(4) ; Loop has a dynamic trip count. Unrolling should occur, but no @@ -232,7 +202,7 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } -!9 = metadata !{metadata !9, metadata !6} +!9 = !{!9, !6} ; #pragma clang loop unroll_count(1) ; Loop should not be unrolled @@ -258,10 +228,10 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!10 = metadata !{metadata !10, metadata !11} -!11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1} +!10 = !{!10, !11} +!11 = !{!"llvm.loop.unroll.count", i32 1} -; #pragma clang loop unroll(enable) +; #pragma clang loop unroll(full) ; Loop has very high loop count (1 million) and full unrolling was requested. ; Loop should unrolled up to the pragma threshold, but not completely. ; @@ -286,4 +256,4 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!12 = metadata !{metadata !12, metadata !4} +!12 = !{!12, !4} diff --git a/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll new file mode 100644 index 000000000000..adbf47defe8f --- /dev/null +++ b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll @@ -0,0 +1,35 @@ +; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s +; Crasher from PR20987. + +; CHECK: define void @update_loop_info_in_subloops +; CHECK: entry: +; CHECK: L: +; CHECK: L.inner: +; CHECK: L.inner.latch: +; CHECK: L.latch: +; CHECK: L.inner.1: +; CHECK: L.inner.latch.1: +; CHECK: L.latch.1: + +define void @update_loop_info_in_subloops() { +entry: + br label %L + +L: + %0 = phi i64 [ 1, %entry ], [ %1, %L.latch ] + br label %L.inner + +L.inner: + br label %L.inner.latch + +L.inner.latch: + br i1 false, label %L.latch, label %L.inner + +L.latch: + %1 = add i64 %0, 1 + %2 = icmp eq i64 %1, 3 + br i1 %2, label %exit, label %L + +exit: + ret void +} |