14 files changed, 537 insertions, 97 deletions
diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
index 17c91e5c07b1..aae79cbac789 100644
--- a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
+++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -41,8 +41,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; CHECK-LABEL: @test
-; CHECK: unr.cmp{{.*}}:
-; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body.prol{{.*}}:
 ; CHECK: for.body:
 ; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
 
diff --git a/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
new file mode 100644
index 000000000000..7a50fc0a4f49
--- /dev/null
+++ b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s
+define void @unroll_opt_for_size() nounwind optsize {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @unroll_opt_for_size
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
+define void @unroll_default() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @unroll_default
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
diff --git a/test/Transforms/LoopUnroll/ephemeral.ll b/test/Transforms/LoopUnroll/ephemeral.ll
new file mode 100644
index 000000000000..9d4061390537
--- /dev/null
+++ b/test/Transforms/LoopUnroll/ephemeral.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s
+
+; Make sure this loop is completely unrolled...
+; CHECK-LABEL: @test1
+; CHECK: for.body:
+; CHECK-NOT: for.end:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+
+  ; This loop will be completely unrolled, even with these extra instructions,
+  ; but only because they're ephemeral (and, thus, free).
+  %1 = add nsw i32 %0, 2
+  %2 = add nsw i32 %1, 4
+  %3 = add nsw i32 %2, 4
+  %4 = add nsw i32 %3, 4
+  %5 = add nsw i32 %4, 4
+  %6 = add nsw i32 %5, 4
+  %7 = add nsw i32 %6, 4
+  %8 = add nsw i32 %7, 4
+  %9 = add nsw i32 %8, 4
+  %10 = add nsw i32 %9, 4
+  %ca = icmp sgt i32 %10, -7
+  call void @llvm.assume(i1 %ca)
+
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll
new file mode 100644
index 000000000000..dcb5d1c28fb2
--- /dev/null
+++ b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll
@@ -0,0 +1,133 @@
+; REQUIRES: asserts
+; RUN: opt < %s -disable-output -stats -loop-unroll -info-output-file - | FileCheck %s --check-prefix=STATS
+; STATS: 1 loop-unroll - Number of loops unrolled (completely or otherwise)
+; Test that llvm.annotation intrinsic do not count against the loop body size
+; and prevent unrolling.
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+@B = common global i32 0, align 4
+
+define void @foo(i32* noalias %A, i32 %B, i32 %C) {
+entry:
+  br label %for.body
+
+; A loop that has a small loop body (except for the annotations) that should be
+; unrolled with the default heuristic. Make sure the extra annotations do not
+; prevent unrolling
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  ; The real loop.
+  %mul = mul nsw i32 %B, %C
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.01
+  store i32 %mul, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp ne i32 %inc, 4
+
+  ; A bunch of annotations
+  %annot.0 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.1 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.2 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.3 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.4 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.5 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.6 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.7 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.8 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.9 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.10 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.11 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.12 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.13 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.14 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.15 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.16 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.17 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.18 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.19 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.20 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.21 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.22 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.23 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.24 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.25 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.26 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.27 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.28 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.29 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.30 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.31 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.32 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.33 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.34 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.35 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.36 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.37 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.38 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.39 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.40 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.41 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.42 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.43 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.44 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.45 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.46 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.47 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.48 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.49 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.50 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.51 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.52 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.53 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.54 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.55 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.56 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.57 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.58 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.59 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.60 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.61 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.62 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.63 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.64 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.65 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.66 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.67 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.68 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.69 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.70 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.71 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.72 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.73 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.74 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.75 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.76 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.77 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.78 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.79 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.80 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.81 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.82 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.83 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.84 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.85 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.86 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.87 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.88 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.89 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.90 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.91 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.92 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.93 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.94 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.95 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.96 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.97 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.98 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.99 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
diff --git a/test/Transforms/LoopUnroll/nsw-tripcount.ll b/test/Transforms/LoopUnroll/nsw-tripcount.ll
new file mode 100644
index 000000000000..98cab32a42a6
--- /dev/null
+++ b/test/Transforms/LoopUnroll/nsw-tripcount.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-unroll -S %s | FileCheck %s
+
+; extern void f(int);
+; void test1(int v) {
+;   for (int i=v; i<=v+1; ++i)
+;     f(i);
+; }
+;
+; We can use the nsw information to see that the tripcount will be 2, so the
+; loop should be unrolled as this is always beneficial
+
+declare void @f(i32)
+
+; CHECK-LABEL: @test1
+define void @test1(i32 %v) {
+entry:
+  %add = add nsw i32 %v, 1
+  br label %for.body
+
+for.body:
+  %i.04 = phi i32 [ %v, %entry ], [ %inc, %for.body ]
+  tail call void @f(i32 %i.04)
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %i.04, %add
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: call void @f
+; CHECK-NOT: br i1
+; CHECK: call void @f
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
index 3179d55e978a..a650317f3df7 100644
--- a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
+++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
 ; Loop size = 3, when the function has the optsize attribute, the
 ; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
-; by 16 times because 3 * 16 < 50.
+; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not).
 define void @unroll_opt_for_size() nounwind optsize {
 entry:
   br label %loop
@@ -32,4 +32,21 @@ exit:
 ; CHECK-NEXT: add
 ; CHECK-NEXT: add
 ; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
 ; CHECK-NEXT: icmp
+
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index a14087dcdce7..3a8777bb1473 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -3,15 +3,16 @@
 ; Tests for unrolling loops with run-time trip counts
 
 ; CHECK: %xtraiter = and i32 %n
-; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
-; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
-; CHECK: br i1 %lcmp.or, label %unr.cmp
+; CHECK:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK:  %lcmp.overflow = icmp eq i32 %n, 0
+; CHECK:  %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
+; CHECK:  br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split
 
-; CHECK: unr.cmp{{.*}}:
-; CHECK: for.body.unr{{.*}}:
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK: for.body.prol:
+; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
+; CHECK:  %prol.iter.sub = sub i32 %prol.iter, 1
+; CHECK:  %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; CHECK:  br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split, !llvm.loop !0
 
 define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
 entry:
@@ -39,7 +40,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; even if the -unroll-runtime is specified
 
 ; CHECK: for.body:
-; CHECK-NOT: for.body.unr:
+; CHECK-NOT: for.body.prol:
 
 define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
 entry:
@@ -85,8 +86,8 @@ cond_true138:
 
 ; Test run-time unrolling for a loop that counts down by -2.
 
-; CHECK: for.body.unr:
-; CHECK: br i1 %cmp.7, label %for.cond.for.end_crit_edge{{.*}}, label %for.body
+; CHECK: for.body.prol:
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
 
 define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
 entry:
@@ -113,3 +114,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
   %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   ret i16 %res.0.lcssa
 }
+
+; CHECK: !0 = distinct !{!0, !1}
+; CHECK: !1 = !{!"llvm.loop.unroll.disable"}
+
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index ad99b8cd9c66..38b4f32354a3 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=4 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s
 
 ; This tests that setting the unroll count works
 
-; CHECK: unr.cmp:
-; CHECK: for.body.unr:
+; CHECK: for.body.prol:
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
 ; CHECK: for.body:
-; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
 ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
 
 define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll
index cbc7af58ff5b..176362a34456 100644
--- a/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -1,10 +1,9 @@
-; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 -unroll-runtime -unroll-count=8 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s
 
 ; Choose a smaller, power-of-two, unroll count if the loop is too large.
 ; This test makes sure we're not unrolling 'odd' counts
 
-; CHECK: unr.cmp:
-; CHECK: for.body.unr:
+; CHECK: for.body.prol:
 ; CHECK: for.body:
 ; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
 ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll
index c3086e8335f9..20161d798e9e 100644
--- a/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/test/Transforms/LoopUnroll/scevunroll.ll
@@ -66,16 +66,13 @@ exit2:
 
 ; SCEV properly unrolls multi-exit loops.
 ;
-; SCEV cannot currently unroll this loop.
-; It should ideally detect a trip count of 5.
-; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops.
 ; CHECK-LABEL: @multiExit(
-; CHECKFIXME: getelementptr i32* %base, i32 10
-; CHECKFIXME-NEXT: load i32*
-; CHECKFIXME: br i1 false, label %l2.10, label %exit1
-; CHECKFIXME: l2.10:
-; CHECKFIXME-NOT: br
-; CHECKFIXME: ret i32
+; CHECK: getelementptr i32* %base, i32 10
+; CHECK-NEXT: load i32*
+; CHECK: br i1 false, label %l2.10, label %exit1
+; CHECK: l2.10:
+; CHECK-NOT: br
+; CHECK: ret i32
 define i32 @multiExit(i32* %base) nounwind {
 entry:
   br label %l1
diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll
new file mode 100644
index 000000000000..d59368578ec2
--- /dev/null
+++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; When prologue is fully unrolled, the branch on its end is unconditional.
+; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
+; like in this example, where it comes from an argument.
+;
+; This test is based on an example from here:
+; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
+;
+; CHECK: while.body.prol:
+; CHECK: br i1
+; CHECK: entry.split:
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @foo(i32 %N) #0 {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %i = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %cmp = icmp eq i32 %i, %N
+  %inc = add i32 %i, 1
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body
+  ret i32 %i
+}
+
+attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
index 39da7fa70ade..4f934a688be8 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
@@ -1,19 +1,19 @@
 ; RUN: opt < %s -loop-unroll -S | FileCheck %s
 ;
-; Verify that the unrolling pass removes existing loop unrolling metadata
+; Verify that the unrolling pass removes existing unroll count metadata
 ; and adds a disable unrolling node after unrolling is complete.
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; #pragma clang loop  vectorize(enable) unroll(enable) unroll_count(4) vectorize_width(8)
+; #pragma clang loop  vectorize(enable) unroll_count(4) vectorize_width(8)
 ;
-; Unroll metadata should be replaces with unroll(disable).  Vectorize
+; Unroll count metadata should be replaced with unroll(disable).  Vectorize
 ; metadata should be untouched.
 ;
-; CHECK-LABEL: @loop1(
+; CHECK-LABEL: @unroll_count_4(
 ; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
-define void @loop1(i32* nocapture %a) {
+define void @unroll_count_4(i32* nocapture %a) {
 entry:
   br label %for.body
 
@@ -30,19 +30,49 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4, metadata !5}
-!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-!3 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
-!4 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
-!5 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
+!1 = !{!1, !2, !3, !4}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
+!3 = !{!"llvm.loop.unroll.count", i32 4}
+!4 = !{!"llvm.loop.vectorize.width", i32 8}
+
+; #pragma clang loop unroll(full)
+;
+; An unroll disable metadata node is only added for the unroll count case.
+; In this case, the loop has a full unroll metadata but can't be fully unrolled
+; because the trip count is dynamic.  The full unroll metadata should remain
+; after unrolling.
+;
+; CHECK-LABEL: @unroll_full(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
+define void @unroll_full(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !5
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!5 = !{!5, !6}
+!6 = !{!"llvm.loop.unroll.full"}
 
 ; #pragma clang loop unroll(disable)
 ;
 ; Unroll metadata should not change.
 ;
-; CHECK-LABEL: @loop2(
-; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
-define void @loop2(i32* nocapture %a) {
+; CHECK-LABEL: @unroll_disable(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+define void @unroll_disable(i32* nocapture %a) {
 entry:
   br label %for.body
 
@@ -54,16 +84,66 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 64
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
 
 for.end:                                          ; preds = %for.body
   ret void
 }
-!6 = metadata !{metadata !6, metadata !7}
-!7 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
-
-; CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[VEC_ENABLE:.*]], metadata ![[WIDTH_8:.*]], metadata ![[UNROLL_DISABLE:.*]]}
-; CHECK: ![[VEC_ENABLE]] = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-; CHECK: ![[WIDTH_8]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
-; CHECK: ![[UNROLL_DISABLE]] = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
-; CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_DISABLE:.*]]}
+!7 = !{!7, !8}
+!8 = !{!"llvm.loop.unroll.disable"}
+
+; This function contains two loops which share the same llvm.loop metadata node
+; with an llvm.loop.unroll.count 2 hint.  Both loops should be unrolled.  This
+; verifies that adding disable metadata to a loop after unrolling doesn't affect
+; other loops which previously shared the same llvm.loop metadata.
+;
+; CHECK-LABEL: @shared_metadata(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+define void @shared_metadata(i32* nocapture %List) #0 {
+entry:
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32* %List, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %0, 10
+  store i32 %add4, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.body3.1.preheader, label %for.body3, !llvm.loop !9
+
+for.body3.1.preheader:                            ; preds = %for.body3
+  br label %for.body3.1
+
+for.body3.1:                                      ; preds = %for.body3.1.preheader, %for.body3.1
+  %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ]
+  %1 = add nsw i64 %indvars.iv.1, 1
+  %arrayidx.1 = getelementptr inbounds i32* %List, i64 %1
+  %2 = load i32* %arrayidx.1, align 4
+  %add4.1 = add nsw i32 %2, 10
+  store i32 %add4.1, i32* %arrayidx.1, align 4
+  %exitcond.1 = icmp eq i64 %1, 4
+  br i1 %exitcond.1, label %for.inc5.1, label %for.body3.1, !llvm.loop !9
+
+for.inc5.1:                                       ; preds = %for.body3.1
+  ret void
+}
+!9 = !{!9, !10}
+!10 = !{!"llvm.loop.unroll.count", i32 2}
+
+
+; CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[VEC_ENABLE:.*]], ![[WIDTH_8:.*]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
+; CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
+; CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[UNROLL_FULL:.*]]}
+; CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"}
+; CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]]}
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
index e1b24e44b5ab..d53fa49cc9f5 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -54,12 +54,12 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.unroll.disable"}
 
 ; loop64 has a high enough count that it should *not* be unrolled by
 ; the default unrolling heuristic.  It serves as the control for the
-; unroll(enable) pragma test loop64_with_.* tests below.
+; unroll(full) pragma test loop64_with_.* tests below.
 ;
 ; CHECK-LABEL: @loop64(
 ; CHECK: store i32
@@ -83,7 +83,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-; #pragma clang loop unroll(enable)
+; #pragma clang loop unroll(full)
 ; Loop should be fully unrolled.
 ;
 ; CHECK-LABEL: @loop64_with_enable(
@@ -105,8 +105,8 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!3 = metadata !{metadata !3, metadata !4}
-!4 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
+!3 = !{!3, !4}
+!4 = !{!"llvm.loop.unroll.full"}
 
 ; #pragma clang loop unroll_count(4)
 ; Loop should be unrolled 4 times.
@@ -135,40 +135,10 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!5 = metadata !{metadata !5, metadata !6}
-!6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
+!5 = !{!5, !6}
+!6 = !{!"llvm.loop.unroll.count", i32 4}
 
-
-; #pragma clang loop unroll_count(enable) unroll_count(4)
-; Loop should be unrolled 4 times.
-;
-; CHECK-LABEL: @loop64_with_enable_and_count4(
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK-NOT: store i32
-; CHECK: br i1
-define void @loop64_with_enable_and_count4(i32* nocapture %a) {
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
-  %inc = add nsw i32 %0, 1
-  store i32 %inc, i32* %arrayidx, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 64
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
-
-for.end:                                          ; preds = %for.body
-  ret void
-}
-!7 = metadata !{metadata !7, metadata !6, metadata !4}
-
-; #pragma clang loop unroll_count(enable)
+; #pragma clang loop unroll(full)
 ; Full unrolling is requested, but loop has a dynamic trip count so
 ; no unrolling should occur.
 ;
@@ -195,7 +165,7 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
-!8 = metadata !{metadata !8, metadata !4}
+!8 = !{!8, !4}
 
 ; #pragma clang loop unroll_count(4)
 ; Loop has a dynamic trip count.  Unrolling should occur, but no
@@ -232,7 +202,7 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
-!9 = metadata !{metadata !9, metadata !6}
+!9 = !{!9, !6}
 
 ; #pragma clang loop unroll_count(1)
 ; Loop should not be unrolled
@@ -258,10 +228,10 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!10 = metadata !{metadata !10, metadata !11}
-!11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1}
+!10 = !{!10, !11}
+!11 = !{!"llvm.loop.unroll.count", i32 1}
 
-; #pragma clang loop unroll(enable)
+; #pragma clang loop unroll(full)
 ; Loop has very high loop count (1 million) and full unrolling was requested.
 ; Loop should unrolled up to the pragma threshold, but not completely.
 ;
@@ -286,4 +256,4 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!12 = metadata !{metadata !12, metadata !4}
+!12 = !{!12, !4}
diff --git a/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
new file mode 100644
index 000000000000..adbf47defe8f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s
+; Crasher from PR20987.
+
+; CHECK: define void @update_loop_info_in_subloops
+; CHECK: entry:
+; CHECK: L:
+; CHECK: L.inner:
+; CHECK: L.inner.latch:
+; CHECK: L.latch:
+; CHECK: L.inner.1:
+; CHECK: L.inner.latch.1:
+; CHECK: L.latch.1:
+
+define void @update_loop_info_in_subloops() {
+entry:
+  br label %L
+
+L:
+  %0 = phi i64 [ 1, %entry ], [ %1, %L.latch ]
+  br label %L.inner
+
+L.inner:
+  br label %L.inner.latch
+
+L.inner.latch:
+  br i1 false, label %L.latch, label %L.inner
+
+L.latch:
+  %1 = add i64 %0, 1
+  %2 = icmp eq i64 %1, 3
+  br i1 %2, label %exit, label %L
+
+exit:
+  ret void
+}