diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-14 15:37:50 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-14 15:37:50 +0000 |
| commit | 581a6d8501ff5614297da837b81ed3b6956361ea (patch) | |
| tree | 985ee91d0ca1d3e6506ac5ff7e37f5b67adfec09 /test/Transforms | |
| parent | 909545a822eef491158f831688066f0ec2866938 (diff) | |
Notes
Diffstat (limited to 'test/Transforms')
53 files changed, 1035 insertions, 110 deletions
diff --git a/test/Transforms/GVN/assume-equal.ll b/test/Transforms/GVN/assume-equal.ll index d423c1685e1d..941f14ce402c 100644 --- a/test/Transforms/GVN/assume-equal.ll +++ b/test/Transforms/GVN/assume-equal.ll @@ -65,22 +65,20 @@ if.then: ; preds = %entry %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0 %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)** %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8 -; FIXME: those loads could be also direct, but right now the invariant.group -; analysis works only on single block -; CHECK-NOT: call i32 @_ZN1A3fooEv( +; CHECK: call i32 @_ZN1A3fooEv( %callx = tail call i32 %call1(%struct.A* %0) #1 %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0 %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)** %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8 -; CHECK-NOT: call i32 @_ZN1A3fooEv( +; CHECK: call i32 @_ZN1A3fooEv( %cally = tail call i32 %call4(%struct.A* %0) #1 %b = bitcast i8* %call to %struct.A** %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0 %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)** %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8 -; CHECK-NOT: call i32 @_ZN1A3fooEv( +; CHECK: call i32 @_ZN1A3fooEv( %unknown = tail call i32 %vfun(%struct.A* %0) #1 br label %if.end diff --git a/test/Transforms/GVN/invariant.group.ll b/test/Transforms/GVN/invariant.group.ll index d0b32d7f3dd8..6f1f357cad65 100644 --- a/test/Transforms/GVN/invariant.group.ll +++ b/test/Transforms/GVN/invariant.group.ll @@ -392,6 +392,44 @@ define void @testNotGlobal() { ret void } +; CHECK-LABEL: define void @handling_loops() +define void @handling_loops() { + %a = alloca %struct.A, align 8 + %1 = bitcast %struct.A* %a to i8* + %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0 + %3 = load i8, i8* @unknownPtr, align 4 + %4 = icmp sgt i8 %3, 0 + br i1 %4, label %.lr.ph.i, label %_Z2g2R1A.exit + +.lr.ph.i: ; preds = %0 + %5 = bitcast %struct.A* %a to void (%struct.A*)*** + %6 = load i8, i8* @unknownPtr, align 4 + %7 = icmp sgt i8 %6, 1 + br i1 %7, label %._crit_edge.preheader, label %_Z2g2R1A.exit + +._crit_edge.preheader: ; preds = %.lr.ph.i + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.preheader, %._crit_edge + %8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ] + %.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0 + %9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8 + ; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a) + call void %9(%struct.A* nonnull %a) #3 + ; CHECK-NOT: call void % + %10 = add nuw nsw i8 %8, 1 + %11 = load i8, i8* @unknownPtr, align 4 + %12 = icmp slt i8 %10, %11 + br i1 %12, label %._crit_edge, label %_Z2g2R1A.exit.loopexit + +_Z2g2R1A.exit.loopexit: ; preds = %._crit_edge + br label %_Z2g2R1A.exit + +_Z2g2R1A.exit: ; preds = %_Z2g2R1A.exit.loopexit, %.lr.ph.i, %0 + ret void +} + declare void @foo(i8*) declare void @foo2(i8*, i8) diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll index 6b5f5a949530..aee853ae9eeb 100644 --- a/test/Transforms/InstCombine/fabs.ll +++ b/test/Transforms/InstCombine/fabs.ll @@ -5,6 +5,8 @@ declare float @fabsf(float) declare double @fabs(double) declare fp128 @fabsl(fp128) +declare float @llvm.fma.f32(float, float, float) +declare float @llvm.fmuladd.f32(float, float, float) define float @square_fabs_call_f32(float %x) { %mul = fmul float %x, %x @@ -80,7 +82,6 @@ define fp128 @square_fabs_intrinsic_f128(fp128 %x) { ; CHECK-NEXT: ret fp128 %fabsl } -; TODO: This should be able to elimnated the fabs define float @square_nnan_fabs_intrinsic_f32(float %x) { %mul = fmul nnan float %x, %x %fabsf = call float @llvm.fabs.f32(float %mul) @@ -88,8 +89,7 @@ define float @square_nnan_fabs_intrinsic_f32(float %x) { ; CHECK-LABEL: square_nnan_fabs_intrinsic_f32( ; CHECK-NEXT: %mul = fmul nnan float %x, %x -; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %mul) -; CHECK-NEXT: ret float %fabsf +; CHECK-NEXT: ret float %mul } ; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization. @@ -170,3 +170,47 @@ define float @fabs_select_var_constant_negative(i32 %c, float %x) { %fabs = call float @llvm.fabs.f32(float %select) ret float %fabs } + +; The fabs cannot be eliminated because %x may be a NaN +define float @square_fma_fabs_intrinsic_f32(float %x) { + %fma = call float @llvm.fma.f32(float %x, float %x, float 1.0) + %fabsf = call float @llvm.fabs.f32(float %fma) + ret float %fabsf + +; CHECK-LABEL: @square_fma_fabs_intrinsic_f32( +; CHECK-NEXT: %fma = call float @llvm.fma.f32(float %x, float %x, float 1.000000e+00) +; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fma) +; CHECK-NEXT: ret float %fabsf +} + +; The fabs cannot be eliminated because %x may be a NaN +define float @square_nnan_fma_fabs_intrinsic_f32(float %x) { + %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.0) + %fabsf = call float @llvm.fabs.f32(float %fma) + ret float %fabsf + +; CHECK-LABEL: @square_nnan_fma_fabs_intrinsic_f32( +; CHECK-NEXT: %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.000000e+00) +; CHECK-NEXT: ret float %fma +} + +define float @square_fmuladd_fabs_intrinsic_f32(float %x) { + %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.0) + %fabsf = call float @llvm.fabs.f32(float %fmuladd) + ret float %fabsf + +; CHECK-LABEL: @square_fmuladd_fabs_intrinsic_f32( +; CHECK-NEXT: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00) +; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fmuladd) +; CHECK-NEXT: ret float %fabsf +} + +define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) { + %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.0) + %fabsf = call float @llvm.fabs.f32(float %fmuladd) + ret float %fabsf + +; CHECK-LABEL: @square_nnan_fmuladd_fabs_intrinsic_f32( +; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00) +; CHECK-NEXT: ret float %fmuladd +} diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll index 84f24ca0bf24..ad8a9247e4e1 100644 --- a/test/Transforms/InstCombine/fast-math.ll +++ b/test/Transforms/InstCombine/fast-math.ll @@ -241,7 +241,7 @@ define float @fmul2(float %f1) { ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses @fmul2_external = external global float define float @fmul2_disable(float %f1) { - %div = fdiv fast float 1.000000e+00, %f1 + %div = fdiv fast float 1.000000e+00, %f1 store float %div, float* @fmul2_external %mul = fmul fast float %div, 2.000000e+00 ret float %mul @@ -672,8 +672,7 @@ define double @sqrt_intrinsic_arg_4th(double %x) { ; CHECK-LABEL: sqrt_intrinsic_arg_4th( ; CHECK-NEXT: %mul = fmul fast double %x, %x -; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul) -; CHECK-NEXT: ret double %fabs +; CHECK-NEXT: ret double %mul } define double @sqrt_intrinsic_arg_5th(double %x) { @@ -685,9 +684,8 @@ define double @sqrt_intrinsic_arg_5th(double %x) { ; CHECK-LABEL: sqrt_intrinsic_arg_5th( ; CHECK-NEXT: %mul = fmul fast double %x, %x -; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul) ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x) -; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 +; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1 ; CHECK-NEXT: ret double %1 } diff --git a/test/Transforms/InstCombine/fdiv.ll b/test/Transforms/InstCombine/fdiv.ll index af6a2401a8fc..9a10c4523351 100644 --- a/test/Transforms/InstCombine/fdiv.ll +++ b/test/Transforms/InstCombine/fdiv.ll @@ -49,3 +49,21 @@ define float @test6(float %x, float %y, float %z) nounwind readnone ssp { ; CHECK-NEXT: fmul fast ; CHECK-NEXT: fdiv fast } + +; CHECK-LABEL @fdiv_fneg_fneg( +; CHECK: %div = fdiv float %x, %y +define float @fdiv_fneg_fneg(float %x, float %y) { + %x.fneg = fsub float -0.0, %x + %y.fneg = fsub float -0.0, %y + %div = fdiv float %x.fneg, %y.fneg + ret float %div +} + +; CHECK-LABEL @fdiv_fneg_fneg_fast( +; CHECK: %div = fdiv fast float %x, %y +define float @fdiv_fneg_fneg_fast(float %x, float %y) { + %x.fneg = fsub float -0.0, %x + %y.fneg = fsub float -0.0, %y + %div = fdiv fast float %x.fneg, %y.fneg + ret float %div +} diff --git a/test/Transforms/InstCombine/pow-4.ll b/test/Transforms/InstCombine/pow-4.ll index 911ab4d94c6a..9293f14cb106 100644 --- a/test/Transforms/InstCombine/pow-4.ll +++ b/test/Transforms/InstCombine/pow-4.ll @@ -10,8 +10,8 @@ declare float @llvm.pow.f32(float, float) define float @test_simplify_4f(float %x) { ; CHECK-LABEL: @test_simplify_4f( ; CHECK-NOT: pow -; CHECK-NEXT: %1 = fmul float %x, %x -; CHECK-NEXT: %2 = fmul float %1, %1 +; CHECK-NEXT: %1 = fmul fast float %x, %x +; CHECK-NEXT: %2 = fmul fast float %1, %1 ; CHECK-NEXT: ret float %2 %1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00) ret float %1 @@ -21,8 +21,8 @@ define float @test_simplify_4f(float %x) { define double @test_simplify_3(double %x) { ; CHECK-LABEL: @test_simplify_3( ; CHECK-NOT: pow -; CHECK-NEXT: %1 = fmul double %x, %x -; CHECK-NEXT: %2 = fmul double %1, %x +; CHECK-NEXT: %1 = fmul fast double %x, %x +; CHECK-NEXT: %2 = fmul fast double %1, %x ; CHECK-NEXT: ret double %2 %1 = call fast double @llvm.pow.f64(double %x, double 3.000000e+00) ret double %1 @@ -32,8 +32,8 @@ define double @test_simplify_3(double %x) { define double @test_simplify_4(double %x) { ; CHECK-LABEL: @test_simplify_4( ; CHECK-NOT: pow -; CHECK-NEXT: %1 = fmul double %x, %x -; CHECK-NEXT: %2 = fmul double %1, %1 +; CHECK-NEXT: %1 = fmul fast double %x, %x +; CHECK-NEXT: %2 = fmul fast double %1, %1 ; CHECK-NEXT: ret double %2 %1 = call fast double @llvm.pow.f64(double %x, double 4.000000e+00) ret double %1 @@ -43,11 +43,11 @@ define double @test_simplify_4(double %x) { define double @test_simplify_15(double %x) { ; CHECK-LABEL: @test_simplify_15( ; CHECK-NOT: pow -; CHECK-NEXT: %1 = fmul double %x, %x -; CHECK-NEXT: %2 = fmul double %1, %x -; CHECK-NEXT: %3 = fmul double %2, %2 -; CHECK-NEXT: %4 = fmul double %3, %3 -; CHECK-NEXT: %5 = fmul double %2, %4 +; CHECK-NEXT: %1 = fmul fast double %x, %x +; CHECK-NEXT: %2 = fmul fast double %1, %x +; CHECK-NEXT: %3 = fmul fast double %2, %2 +; CHECK-NEXT: %4 = fmul fast double %3, %3 +; CHECK-NEXT: %5 = fmul fast double %2, %4 ; CHECK-NEXT: ret double %5 %1 = call fast double @llvm.pow.f64(double %x, double 1.500000e+01) ret double %1 @@ -57,11 +57,11 @@ define double @test_simplify_15(double %x) { define double @test_simplify_neg_7(double %x) { ; CHECK-LABEL: @test_simplify_neg_7( ; CHECK-NOT: pow -; CHECK-NEXT: %1 = fmul double %x, %x -; CHECK-NEXT: %2 = fmul double %1, %x -; CHECK-NEXT: %3 = fmul double %1, %2 -; CHECK-NEXT: %4 = fmul double %1, %3 -; CHECK-NEXT: %5 = fdiv double 1.000000e+00, %4 +; CHECK-NEXT: %1 = fmul fast double %x, %x +; CHECK-NEXT: %2 = fmul fast double %1, %1 +; CHECK-NEXT: %3 = fmul fast double %2, %x +; CHECK-NEXT: %4 = fmul fast double %1, %3 +; CHECK-NEXT: %5 = fdiv fast double 1.000000e+00, %4 ; CHECK-NEXT: ret double %5 %1 = call fast double @llvm.pow.f64(double %x, double -7.000000e+00) ret double %1 @@ -71,13 +71,13 @@ define double @test_simplify_neg_7(double %x) { define double @test_simplify_neg_19(double %x) { ; CHECK-LABEL: @test_simplify_neg_19( ; CHECK-NOT: pow -; CHECK-NEXT: %1 = fmul double %x, %x -; CHECK-NEXT: %2 = fmul double %1, %1 -; CHECK-NEXT: %3 = fmul double %2, %2 -; CHECK-NEXT: %4 = fmul double %3, %3 -; CHECK-NEXT: %5 = fmul double %1, %4 -; CHECK-NEXT: %6 = fmul double %5, %x -; CHECK-NEXT: %7 = fdiv double 1.000000e+00, %6 +; CHECK-NEXT: %1 = fmul fast double %x, %x +; CHECK-NEXT: %2 = fmul fast double %1, %1 +; CHECK-NEXT: %3 = fmul fast double %2, %2 +; CHECK-NEXT: %4 = fmul fast double %3, %3 +; CHECK-NEXT: %5 = fmul fast double %1, %4 +; CHECK-NEXT: %6 = fmul fast double %5, %x +; CHECK-NEXT: %7 = fdiv fast double 1.000000e+00, %6 ; CHECK-NEXT: ret double %7 %1 = call fast double @llvm.pow.f64(double %x, double -1.900000e+01) ret double %1 @@ -97,11 +97,11 @@ define double @test_simplify_11_23(double %x) { define double @test_simplify_32(double %x) { ; CHECK-LABEL: @test_simplify_32( ; CHECK-NOT: pow -; CHECK-NEXT: %1 = fmul double %x, %x -; CHECK-NEXT: %2 = fmul double %1, %1 -; CHECK-NEXT: %3 = fmul double %2, %2 -; CHECK-NEXT: %4 = fmul double %3, %3 -; CHECK-NEXT: %5 = fmul double %4, %4 +; CHECK-NEXT: %1 = fmul fast double %x, %x +; CHECK-NEXT: %2 = fmul fast double %1, %1 +; CHECK-NEXT: %3 = fmul fast double %2, %2 +; CHECK-NEXT: %4 = fmul fast double %3, %3 +; CHECK-NEXT: %5 = fmul fast double %4, %4 ; CHECK-NEXT: ret double %5 %1 = call fast double @llvm.pow.f64(double %x, double 3.200000e+01) ret double %1 diff --git a/test/Transforms/InstCombine/pow-sqrt.ll b/test/Transforms/InstCombine/pow-sqrt.ll index 1e6166c5f114..52175f1b1247 100644 --- a/test/Transforms/InstCombine/pow-sqrt.ll +++ b/test/Transforms/InstCombine/pow-sqrt.ll @@ -9,5 +9,14 @@ define double @pow_half(double %x) { ; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) ; CHECK-NEXT: ret double %sqrt -declare double @llvm.pow.f64(double, double) +define double @pow_neghalf(double %x) { + %pow = call fast double @llvm.pow.f64(double %x, double -5.000000e-01) + ret double %pow +} +; CHECK-LABEL: define double @pow_neghalf( +; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) #0 +; CHECK-NEXT: %sqrtrecip = fdiv fast double 1.000000e+00, %sqrt +; CHECK-NEXT: ret double %sqrtrecip + +declare double @llvm.pow.f64(double, double) diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll index 21c9fdde1506..dfdb88dcc858 100644 --- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll +++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll @@ -103,3 +103,95 @@ define float @PR22688(float %x) { ret float %7 } +declare float @llvm.fabs.f32(float) + +; CHECK-LABEL: @fabs_select_positive_constants( +; CHECK: %select = select i1 %cmp, float 1.000000e+00, float 2.000000e+00 +; CHECK-NEXT: ret float %select +define float @fabs_select_positive_constants(i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 1.0, float 2.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_constant_variable( +; CHECK: %select = select i1 %cmp, float 1.000000e+00, float %x +; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_constant_variable(i32 %c, float %x) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 1.0, float %x + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_neg0_pos0( +; CHECK: %select = select i1 %cmp, float -0.000000e+00, float 0.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +; CHECK-NEXT: ret float %fabs +define float @fabs_select_neg0_pos0(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -0.0, float 0.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_neg0_neg1( +; CHECK: %select = select i1 %cmp, float -0.000000e+00, float -1.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_neg0_neg1(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -0.0, float -1.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_nan_nan( +; CHECK: %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000 +; CHECK-NEXT: ret float %select +define float @fabs_select_nan_nan(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_nan( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_nan(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_negnan( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_negnan(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_negzero( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float -0.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_negzero(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float -0.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_zero( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_zero(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float 0.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} diff --git a/test/Transforms/LICM/argmemonly-call.ll b/test/Transforms/LICM/argmemonly-call.ll index 18d7f8351dca..fe7c6af6d6d9 100644 --- a/test/Transforms/LICM/argmemonly-call.ll +++ b/test/Transforms/LICM/argmemonly-call.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -basicaa -licm %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s declare i32 @foo() readonly argmemonly nounwind declare i32 @foo2() readonly nounwind declare i32 @bar(i32* %loc2) readonly argmemonly nounwind diff --git a/test/Transforms/LICM/assume.ll b/test/Transforms/LICM/assume.ll index f6369ac659f0..c8c93ae89b91 100644 --- a/test/Transforms/LICM/assume.ll +++ b/test/Transforms/LICM/assume.ll @@ -1,5 +1,5 @@ ; RUN: opt -licm -basicaa < %s -S | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s define void @f_0(i1 %p) nounwind ssp { ; CHECK-LABEL: @f_0( diff --git a/test/Transforms/LICM/atomics.ll b/test/Transforms/LICM/atomics.ll index 5dcd4bb8c05a..d23cb49c5486 100644 --- a/test/Transforms/LICM/atomics.ll +++ b/test/Transforms/LICM/atomics.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -S -basicaa -licm | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s ; Check that we can hoist unordered loads define i32 @test1(i32* nocapture %y) nounwind uwtable ssp { diff --git a/test/Transforms/LICM/basictest.ll b/test/Transforms/LICM/basictest.ll index 570e226d2372..78c87ce76517 100644 --- a/test/Transforms/LICM/basictest.ll +++ b/test/Transforms/LICM/basictest.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -licm | llvm-dis -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s | llvm-dis +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s | llvm-dis define void @testfunc(i32 %i) { ; <label>:0 diff --git a/test/Transforms/LICM/constexpr.ll b/test/Transforms/LICM/constexpr.ll index 726246776dc6..8ffc73513600 100644 --- a/test/Transforms/LICM/constexpr.ll +++ b/test/Transforms/LICM/constexpr.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -S -basicaa -licm | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s ; This fixes PR22460 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll index 75c27b8def0c..93ea2192e03e 100644 --- a/test/Transforms/LICM/crash.ll +++ b/test/Transforms/LICM/crash.ll @@ -1,5 +1,5 @@ ; RUN: opt -licm -disable-output < %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -disable-output < %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -disable-output < %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll index ab77caa2bae0..831a0d8b51f9 100644 --- a/test/Transforms/LICM/debug-value.ll +++ b/test/Transforms/LICM/debug-value.ll @@ -1,5 +1,5 @@ ; RUN: opt -licm -basicaa < %s -S | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s define void @dgefa() nounwind ssp { entry: diff --git a/test/Transforms/LICM/extra-copies.ll b/test/Transforms/LICM/extra-copies.ll index 84a3bc9ec6a6..2f8e814c15ee 100644 --- a/test/Transforms/LICM/extra-copies.ll +++ b/test/Transforms/LICM/extra-copies.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -licm -S | FileCheck %s -; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s ; PR19835 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LICM/funclet.ll b/test/Transforms/LICM/funclet.ll index 9bdc6dbcde88..6b5f11507ed9 100644 --- a/test/Transforms/LICM/funclet.ll +++ b/test/Transforms/LICM/funclet.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -licm -S | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i386-pc-windows-msvc18.0.0" diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll index 5752aecde387..6ef00738820e 100644 --- a/test/Transforms/LICM/hoist-bitcast-load.ll +++ b/test/Transforms/LICM/hoist-bitcast-load.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll index ed6ec7694d3c..e67becdeb5e4 100644 --- a/test/Transforms/LICM/hoist-deref-load.ll +++ b/test/Transforms/LICM/hoist-deref-load.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LICM/hoist-nounwind.ll b/test/Transforms/LICM/hoist-nounwind.ll index 081729f808bf..e9720235893a 100644 --- a/test/Transforms/LICM/hoist-nounwind.ll +++ b/test/Transforms/LICM/hoist-nounwind.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LICM/hoist-round.ll b/test/Transforms/LICM/hoist-round.ll index a87709b810d2..9c6a3a180b50 100644 --- a/test/Transforms/LICM/hoist-round.ll +++ b/test/Transforms/LICM/hoist-round.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32" diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll index c61131b476b9..29595b3e1cc0 100644 --- a/test/Transforms/LICM/hoisting.ll +++ b/test/Transforms/LICM/hoisting.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -licm -S | FileCheck %s -; RUN: opt -lcssa %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S | FileCheck %s +; RUN: opt -lcssa %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S | FileCheck %s @X = global i32 0 ; <i32*> [#uses=1] diff --git a/test/Transforms/LICM/lcssa-ssa-promoter.ll b/test/Transforms/LICM/lcssa-ssa-promoter.ll index d466b3baffc8..0644a627f718 100644 --- a/test/Transforms/LICM/lcssa-ssa-promoter.ll +++ b/test/Transforms/LICM/lcssa-ssa-promoter.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s| FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s| FileCheck %s ; ; Manually validate LCSSA form is preserved even after SSAUpdater is used to ; promote things in the loop bodies. diff --git a/test/Transforms/LICM/no-preheader-test.ll b/test/Transforms/LICM/no-preheader-test.ll index 4b6847cdad51..5cfa462dfc4a 100644 --- a/test/Transforms/LICM/no-preheader-test.ll +++ b/test/Transforms/LICM/no-preheader-test.ll @@ -1,6 +1,6 @@ ; Test that LICM works when there is not a loop-preheader ; RUN: opt < %s -licm | llvm-dis -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s | llvm-dis +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s | llvm-dis define void @testfunc(i32 %i.s, i1 %ifcond) { br i1 %ifcond, label %Then, label %Else diff --git a/test/Transforms/LICM/opt-remarks-conditional-load.ll b/test/Transforms/LICM/opt-remarks-conditional-load.ll new file mode 100644 index 000000000000..96bdeaff66ef --- /dev/null +++ b/test/Transforms/LICM/opt-remarks-conditional-load.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -licm -pass-remarks-missed=licm -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' %s -o /dev/null -pass-remarks-missed=licm 2>&1 | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +; With the load from %p conditional, we can't optmize this and the remark +; should tell us about it. + +define void @test(i32* %array, i32* noalias %p) { +Entry: + br label %Loop + +Loop: + %j = phi i32 [ 0, %Entry ], [ %Next, %else] + %addr = getelementptr i32, i32* %array, i32 %j + %a = load i32, i32* %addr + %c = icmp eq i32 %a, 0 + br i1 %c, label %then, label %else + +then: +; CHECK: remark: /tmp/kk.c:2:20: failed to hoist load with loop-invariant address because load is conditionally executed + %b = load i32, i32* %p, !dbg !8 + %a2 = add i32 %a, %b + store i32 %a2, i32* %addr + br label %else + +else: + %Next = add i32 %j, 1 + %cond = icmp eq i32 %Next, 0 + br i1 %cond, label %Out, label %Loop + +Out: + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "/tmp/kk.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"PIC Level", i32 2} +!5 = !{!"clang version 3.9.0 "} +!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 2, column: 20, scope: !6) diff --git a/test/Transforms/LICM/opt-remarks-intervening-store.ll b/test/Transforms/LICM/opt-remarks-intervening-store.ll new file mode 100644 index 000000000000..95389ceaf9a9 --- /dev/null +++ b/test/Transforms/LICM/opt-remarks-intervening-store.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -licm -pass-remarks-missed=licm -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' %s -o /dev/null -pass-remarks-missed=licm 2>&1 | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +; Without the noalias on %p, we can't optmize this and the remark should tell +; us about it. + +define void @test(i32* %array, i32* %p) { +Entry: + br label %Loop + +Loop: + %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] + %addr = getelementptr i32, i32* %array, i32 %j + %a = load i32, i32* %addr +; CHECK: remark: /tmp/kk.c:2:20: failed to move load with loop-invariant address because the loop may invalidate its value + %b = load i32, i32* %p, !dbg !8 + %a2 = add i32 %a, %b + store i32 %a2, i32* %addr + %Next = add i32 %j, 1 + %cond = icmp eq i32 %Next, 0 + br i1 %cond, label %Out, label %Loop + +Out: + ret void +} + +; This illustrates why we need to check loop-invariance before issuing this +; remark. + +define i32 @invalidated_load_with_non_loop_invariant_address(i32* %array, i32* %array2) { +Entry: + br label %Loop + +Loop: + %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] + +; CHECK-NOT: /tmp/kk.c:3:20: {{.*}} loop-invariant + %addr = getelementptr i32, i32* %array, i32 %j + %a = load i32, i32* %addr, !dbg !9 + + %addr2 = getelementptr i32, i32* %array2, i32 %j + store i32 %j, i32* %addr2 + + %Next = add i32 %j, 1 + %cond = icmp eq i32 %Next, 0 + br i1 %cond, label %Out, label %Loop + +Out: + %a2 = phi i32 [ %a, %Loop ] + ret i32 %a2 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "/tmp/kk.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"PIC Level", i32 2} +!5 = !{!"clang version 3.9.0 "} +!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 2, column: 20, scope: !6) +!9 = !DILocation(line: 3, column: 20, scope: !6) diff --git a/test/Transforms/LICM/opt-remarks.ll b/test/Transforms/LICM/opt-remarks.ll new file mode 100644 index 000000000000..f0ef386c9f9a --- /dev/null +++ b/test/Transforms/LICM/opt-remarks.ll @@ -0,0 +1,81 @@ +; RUN: opt < %s -licm -pass-remarks=licm -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' %s -o /dev/null -pass-remarks=licm 2>&1 | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +define void @hoist(i32* %array, i32* noalias %p) { +Entry: + br label %Loop + +Loop: + %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] + %addr = getelementptr i32, i32* %array, i32 %j + %a = load i32, i32* %addr +; CHECK: remark: /tmp/kk.c:2:20: hosting load + %b = load i32, i32* %p, !dbg !8 + %a2 = add i32 %a, %b + store i32 %a2, i32* %addr + %Next = add i32 %j, 1 + %cond = icmp eq i32 %Next, 0 + br i1 %cond, label %Out, label %Loop + +Out: + ret void +} + +define i32 @sink(i32* %array, i32* noalias %p, i32 %b) { +Entry: + br label %Loop + +Loop: + %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] + %addr = getelementptr i32, i32* %array, i32 %j + %a = load i32, i32* %addr + %a2 = add i32 %a, %b + store i32 %a2, i32* %addr +; CHECK: remark: /tmp/kk.c:2:21: sinking add + %a3 = add i32 %a, 1, !dbg !9 + %Next = add i32 %j, 1 + %cond = icmp eq i32 %Next, 0 + br i1 %cond, label %Out, label %Loop + +Out: + %a4 = phi i32 [ %a3, %Loop ] + ret i32 %a4 +} + +define void @promote(i32* %array, i32* noalias %p) { +Entry: + br label %Loop + +Loop: + %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] + %addr = getelementptr i32, i32* %array, i32 %j + %a = load i32, i32* %addr + %b = load i32, i32* %p + %a2 = add i32 %a, %b + store i32 %a2, i32* %addr +; CHECK: remark: /tmp/kk.c:2:22: Moving accesses to memory location out of the loop + store i32 %b, i32* %p, !dbg !10 + %Next = add i32 %j, 1 + %cond = icmp eq i32 %Next, 0 + br i1 %cond, label %Out, label %Loop + +Out: + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "/tmp/kk.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"PIC Level", i32 2} +!5 = !{!"clang version 3.9.0 "} +!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 2, column: 20, scope: !6) +!9 = !DILocation(line: 2, column: 21, scope: !6) +!10 = !DILocation(line: 2, column: 22, scope: !6) diff --git a/test/Transforms/LICM/preheader-safe.ll b/test/Transforms/LICM/preheader-safe.ll index adc4f4237a29..8f82d1c68bb3 100644 --- a/test/Transforms/LICM/preheader-safe.ll +++ b/test/Transforms/LICM/preheader-safe.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s declare void @use_nothrow(i64 %a) nounwind declare void @use(i64 %a) diff --git a/test/Transforms/LICM/promote-order.ll b/test/Transforms/LICM/promote-order.ll index 7d87bb221b76..b7e0b7c6c437 100644 --- a/test/Transforms/LICM/promote-order.ll +++ b/test/Transforms/LICM/promote-order.ll @@ -1,5 +1,5 @@ ; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s -; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s ; LICM should keep the stores in their original order when it sinks/promotes them. ; rdar://12045203 diff --git a/test/Transforms/LICM/promote-tls.ll b/test/Transforms/LICM/promote-tls.ll index 1849afade0e4..076d05cf094a 100644 --- a/test/Transforms/LICM/promote-tls.ll +++ b/test/Transforms/LICM/promote-tls.ll @@ -1,5 +1,5 @@ ; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s -; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s ; If we can prove a local is thread local, we can insert stores during ; promotion which wouldn't be legal otherwise. diff --git a/test/Transforms/LICM/scalar-promote-memmodel.ll b/test/Transforms/LICM/scalar-promote-memmodel.ll index ceee7292ac5c..c09c2b361e02 100644 --- a/test/Transforms/LICM/scalar-promote-memmodel.ll +++ b/test/Transforms/LICM/scalar-promote-memmodel.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -basicaa -licm -S | FileCheck %s -; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s ; Make sure we don't hoist a conditionally-executed store out of the loop; ; it would violate the concurrency memory model diff --git a/test/Transforms/LICM/scalar_promote-unwind.ll b/test/Transforms/LICM/scalar_promote-unwind.ll index 22e7e50c22e5..dd3693b4af63 100644 --- a/test/Transforms/LICM/scalar_promote-unwind.ll +++ b/test/Transforms/LICM/scalar_promote-unwind.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -basicaa -licm -S | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll index dc5151be8a82..c88701154b8f 100644 --- a/test/Transforms/LICM/scalar_promote.ll +++ b/test/Transforms/LICM/scalar_promote.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -basicaa -tbaa -licm -S | FileCheck %s -; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @X = global i32 7 ; <i32*> [#uses=4] diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll index fed1cbaa8555..5d0108b129df 100644 --- a/test/Transforms/LICM/speculate.ll +++ b/test/Transforms/LICM/speculate.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -licm < %s | FileCheck %s -; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s +; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s ; UDiv is safe to speculate if the denominator is known non-zero. diff --git a/test/Transforms/LICM/volatile-alias.ll b/test/Transforms/LICM/volatile-alias.ll index 7836df004c0f..f387012015c7 100644 --- a/test/Transforms/LICM/volatile-alias.ll +++ b/test/Transforms/LICM/volatile-alias.ll @@ -1,5 +1,5 @@ ; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s -; RUN: opt -basicaa -sroa -loop-rotate %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S | FileCheck %s +; RUN: opt -basicaa -sroa -loop-rotate %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S | FileCheck %s ; The objects *p and *q are aliased to each other, but even though *q is ; volatile, *p can be considered invariant in the loop. Check if it is moved ; out of the loop. diff --git a/test/Transforms/LoopSimplify/preserve-scev.ll b/test/Transforms/LoopSimplify/preserve-scev.ll index f6fa8afc56b5..b78ce97fb46a 100644 --- a/test/Transforms/LoopSimplify/preserve-scev.ll +++ b/test/Transforms/LoopSimplify/preserve-scev.ll @@ -1,14 +1,38 @@ -; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep "%cmp = icmp slt i32" | grep "= {%\.ph,+,1}<%for.cond>" -; PR8079 +; RUN: opt -S < %s -analyze -scalar-evolution -loop-simplify -scalar-evolution | FileCheck %s ; Provide legal integer types. target datalayout = "n8:16:32:64" -; LoopSimplify should invalidate indvars when splitting out the -; inner loop. - @maxStat = external global i32 +; LoopSimplify should invalidate SCEV when splitting out the +; inner loop. +; +; First SCEV print: +; CHECK-LABEL: Classifying expressions for: @test +; CHECK: %[[PHI:.*]] = phi i32 [ 0, %entry ], [ %{{.*}}, %if.then5 ], [ %[[PHI]], %if.end ] +; CHECK-LABEL: Determining loop execution counts for: @test +; CHECK: Loop %for.body18: Unpredictable backedge-taken count. +; CHECK: Loop %for.body18: Unpredictable max backedge-taken count. +; CHECK: Loop %for.body18: Unpredictable predicated backedge-taken count. +; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %for.cond: Unpredictable max backedge-taken count. +; CHECK: Loop %for.cond: Unpredictable predicated backedge-taken count. +; +; Now simplify the loop, which should cause SCEV to re-compute more precise +; info here in addition to having preheader PHIs. Second SCEV print: +; CHECK-LABEL: Classifying expressions for: @test +; CHECK: phi i32 [ %{{.*}}, %if.then5 ], [ 0, %entry ] +; CHECK-LABEL: Determining loop execution counts for: @test +; CHECK: Loop %for.body18: Unpredictable backedge-taken count. +; CHECK: Loop %for.body18: Unpredictable max backedge-taken count. +; CHECK: Loop %for.body18: Unpredictable predicated backedge-taken count. +; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %for.cond: max backedge-taken count is -2147483647 +; CHECK: Loop %for.cond: Unpredictable predicated backedge-taken count. +; CHECK: Loop %for.cond.outer: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %for.cond.outer: Unpredictable max backedge-taken count. +; CHECK: Loop %for.cond.outer: Unpredictable predicated backedge-taken count. define i32 @test() nounwind { entry: br label %for.cond @@ -52,12 +76,27 @@ return: ; preds = %for.body18, %for.bo declare void @foo() nounwind -; Notify SCEV when removing an ExitingBlock. -; CHECK-LABEL: @mergeExit( -; CHECK: while.cond191: -; CHECK: br i1 %or.cond, label %while.body197 -; CHECK-NOT: land.rhs: -; CHECK: ret +; Notify SCEV when removing an ExitingBlock. This only changes the +; backedge-taken information. +; +; First SCEV print: +; CHECK-LABEL: Determining loop execution counts for: @mergeExit +; CHECK: Loop %while.cond191: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %while.cond191: max backedge-taken count is -1 +; CHECK: Loop %while.cond191: Unpredictable predicated backedge-taken count. +; CHECK: Loop %while.cond191.outer: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %while.cond191.outer: Unpredictable max backedge-taken count. +; CHECK: Loop %while.cond191.outer: Unpredictable predicated backedge-taken count. +; +; After simplifying, the max backedge count is refined. +; Second SCEV print: +; CHECK-LABEL: Determining loop execution counts for: @mergeExit +; CHECK: Loop %while.cond191: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %while.cond191: max backedge-taken count is 0 +; CHECK: Loop %while.cond191: Unpredictable predicated backedge-taken count. +; CHECK: Loop %while.cond191.outer: <multiple exits> Unpredictable backedge-taken count. +; CHECK: Loop %while.cond191.outer: Unpredictable max backedge-taken count. +; CHECK: Loop %while.cond191.outer: Unpredictable predicated backedge-taken count. define void @mergeExit(i32 %MapAttrCount) nounwind uwtable ssp { entry: br i1 undef, label %if.then124, label %if.end126 diff --git a/test/Transforms/LoopUnroll/peel-loop-pgo.ll b/test/Transforms/LoopUnroll/peel-loop-pgo.ll index 18309b0691fa..a87d5643e7e9 100644 --- a/test/Transforms/LoopUnroll/peel-loop-pgo.ll +++ b/test/Transforms/LoopUnroll/peel-loop-pgo.ll @@ -3,7 +3,12 @@ ; Make sure we use the profile information correctly to peel-off 3 iterations ; from the loop, and update the branch weights for the peeled loop properly. + +; CHECK: Loop Unroll: F[basic] ; CHECK: PEELING loop %for.body with iteration count 3! +; CHECK: Loop Unroll: F[optsize] +; CHECK-NOT: PEELING + ; CHECK-LABEL: @basic ; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !1 ; CHECK: [[NEXT0]]: @@ -37,6 +42,40 @@ for.end: ; preds = %for.cond.for.end_cr ret void } +; We don't want to peel loops when optimizing for size. +; CHECK-LABEL: @optsize +; CHECK: for.body.lr.ph: +; CHECK-NEXT: br label %for.body +; CHECK: for.body: +; CHECK-NOT: br +; CHECK: br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge +define void @optsize(i32* %p, i32 %k) #1 !prof !0 { +entry: + %cmp3 = icmp slt i32 0, %k + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1 + store i32 %i.05, i32* %p.addr.04, align 4 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !prof !1 + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind optsize } + !0 = !{!"function_entry_count", i64 1} !1 = !{!"branch_weights", i32 3001, i32 1001} diff --git a/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll b/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll new file mode 100644 index 000000000000..ad79e38cafa0 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll @@ -0,0 +1,145 @@ +; REQUIRES: asserts +; RUN: opt < %s -S -debug -loop-vectorize -mcpu=slm 2>&1 | FileCheck %s --check-prefix=SLM + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i8 @mul_i8(i8* %dataA, i8* %dataB, i32 %N) { +entry: + %cmp12 = icmp eq i32 %N, 0 + br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + %phitmp = trunc i32 %add4 to i8 + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %acc.0.lcssa = phi i8 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ] + ret i8 %acc.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i8, i8* %dataA, i64 %indvars.iv + %0 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %0 to i32 + %arrayidx2 = getelementptr inbounds i8, i8* %dataB, i64 %indvars.iv + %1 = load i8, i8* %arrayidx2, align 1 + %conv3 = sext i8 %1 to i32 +; sources of the mul is sext\sext from i8 +; use pmullw\sext seq. +; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32 + %mul = mul nsw i32 %conv3, %conv +; sources of the mul is zext\sext from i8 +; use pmulhw\pmullw\pshuf +; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 + %conv4 = zext i8 %1 to i32 + %mul2 = mul nsw i32 %conv4, %conv + %sum0 = add i32 %mul, %mul2 +; sources of the mul is zext\zext from i8 +; use pmullw\zext +; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32 + %conv5 = zext i8 %0 to i32 + %mul3 = mul nsw i32 %conv5, %conv4 + %sum1 = add i32 %sum0, %mul3 +; sources of the mul is sext\-120 +; use pmullw\sext +; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32 + %mul4 = mul nsw i32 -120, %conv3 + %sum2 = add i32 %sum1, %mul4 +; sources of the mul is sext\250 +; use pmulhw\pmullw\pshuf +; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 + %mul5 = mul nsw i32 250, %conv3 + %sum3 = add i32 %sum2, %mul5 +; sources of the mul is zext\-120 +; use pmulhw\pmullw\pshuf +; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 + %mul6 = mul nsw i32 -120, %conv4 + %sum4 = add i32 %sum3, %mul6 +; sources of the mul is zext\250 +; use pmullw\zext +; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32 + %mul7 = mul nsw i32 250, %conv4 + %sum5 = add i32 %sum4, %mul7 + %add = add i32 %acc.013, 5 + %add4 = add i32 %add, %sum5 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +define i16 @mul_i16(i16* %dataA, i16* %dataB, i32 %N) { +entry: + %cmp12 = icmp eq i32 %N, 0 + br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + %phitmp = trunc i32 %add4 to i16 + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %acc.0.lcssa = phi i16 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ] + ret i16 %acc.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i16, i16* %dataA, i64 %indvars.iv + %0 = load i16, i16* %arrayidx, align 1 + %conv = sext i16 %0 to i32 + %arrayidx2 = getelementptr inbounds i16, i16* %dataB, i64 %indvars.iv + %1 = load i16, i16* %arrayidx2, align 1 + %conv3 = sext i16 %1 to i32 +; sources of the mul is sext\sext from i16 +; use pmulhw\pmullw\pshuf seq. +; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 + %mul = mul nsw i32 %conv3, %conv +; sources of the mul is zext\sext from i16 +; use pmulld +; SLM: cost of 11 for VF 4 {{.*}} mul nsw i32 + %conv4 = zext i16 %1 to i32 + %mul2 = mul nsw i32 %conv4, %conv + %sum0 = add i32 %mul, %mul2 +; sources of the mul is zext\zext from i16 +; use pmulhw\pmullw\zext +; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 + %conv5 = zext i16 %0 to i32 + %mul3 = mul nsw i32 %conv5, %conv4 + %sum1 = add i32 %sum0, %mul3 +; sources of the mul is sext\-32000 +; use pmulhw\pmullw\sext +; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 + %mul4 = mul nsw i32 -32000, %conv3 + %sum2 = add i32 %sum1, %mul4 +; sources of the mul is sext\64000 +; use pmulld +; SLM: cost of 11 for VF 4 {{.*}} mul nsw i32 + %mul5 = mul nsw i32 64000, %conv3 + %sum3 = add i32 %sum2, %mul5 +; sources of the mul is zext\-32000 +; use pmulld +; SLM: cost of 11 for VF 4 {{.*}} mul nsw i32 + %mul6 = mul nsw i32 -32000, %conv4 + %sum4 = add i32 %sum3, %mul6 +; sources of the mul is zext\64000 +; use pmulhw\pmullw\zext +; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 + %mul7 = mul nsw i32 250, %conv4 + %sum5 = add i32 %sum4, %mul7 + %add = add i32 %acc.013, 5 + %add4 = add i32 %add, %sum5 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + + diff --git a/test/Transforms/LoopVectorize/pr31190.ll b/test/Transforms/LoopVectorize/pr31190.ll new file mode 100644 index 000000000000..afb1754983cd --- /dev/null +++ b/test/Transforms/LoopVectorize/pr31190.ll @@ -0,0 +1,64 @@ +; RUN: opt -passes='loop-vectorize' -debug -S < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; This checks we don't crash when the inner loop we're trying to vectorize +; is a SCEV AddRec with respect to an outer loop. + +; In this case, the problematic PHI is: +; %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ] +; Since %inc54 is the IV of the outer loop, and %0 equivalent to it, +; we get the situation described above. + +; This test uses the new PM, because with the old PM, running loop-vectorize +; would explicitly run loop-simplify. Even though this loop is already in +; simplified form, loop-simplify would still clean up the phi. +; The reason this matters is that in a real optimizer pipeline, LICM can create +; such PHIs, and since it preserves loop simplified form, the cleanup has +; no chance to run. + +; Code that leads to this situation can look something like: +; +; int a, b[1], c; +; void fn1 () +; { +; for (; c; c++) +; for (a = 0; a; a++) +; b[c] = 4; +; } +; +; The PHI is an artifact of the register promotion of c. + +@c = external global i32, align 4 +@a = external global i32, align 4 +@b = external global [1 x i32], align 4 + +; CHECK: LV: PHI is a recurrence with respect to an outer loop. +; CHECK: LV: Not vectorizing: Cannot prove legality. +; CHECK-LABEL: @test +define void @test() { +entry: + %a.promoted2 = load i32, i32* @a, align 1 + %c.promoted = load i32, i32* @c, align 1 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.for.inc4_crit_edge, %entry + %inc54 = phi i32 [ %inc5, %for.cond1.for.inc4_crit_edge ], [ %c.promoted, %entry ] + %inc.lcssa3 = phi i32 [ %inc.lcssa, %for.cond1.for.inc4_crit_edge ], [ %a.promoted2, %entry ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %inc1 = phi i32 [ %inc.lcssa3, %for.cond1.preheader ], [ %inc, %for.body3 ] + %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ] + %idxprom = sext i32 %0 to i64 + %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @b, i64 0, i64 %idxprom + store i32 4, i32* %arrayidx, align 4 + %inc = add nsw i32 %inc1, 1 + %tobool2 = icmp eq i32 %inc, 0 + br i1 %tobool2, label %for.cond1.for.inc4_crit_edge, label %for.body3 + +for.cond1.for.inc4_crit_edge: ; preds = %for.body3 + %inc.lcssa = phi i32 [ %inc, %for.body3 ] + %.lcssa = phi i32 [ %inc54, %for.body3 ] + %inc5 = add nsw i32 %.lcssa, 1 + br label %for.cond1.preheader +} diff --git a/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml b/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml index d2a3ef81a3a4..b7a1d208fc6f 100644 --- a/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml +++ b/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml @@ -6,5 +6,5 @@ TypeIdMap: typeid1: TTRes: Kind: Unsat - SizeBitWidth: 0 + SizeM1BitWidth: 0 ... diff --git a/test/Transforms/LowerTypeTests/function.ll b/test/Transforms/LowerTypeTests/function.ll index 9abea8f854c1..759041fea6f1 100644 --- a/test/Transforms/LowerTypeTests/function.ll +++ b/test/Transforms/LowerTypeTests/function.ll @@ -43,7 +43,7 @@ declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone define i1 @foo(i8* %p) { ; NATIVE: sub i64 {{.*}}, ptrtoint (void ()* @[[JT]] to i64) ; WASM32: sub i64 {{.*}}, ptrtoint (i8* getelementptr (i8, i8* null, i64 1) to i64) - ; WASM32: icmp ult i64 {{.*}}, 2 + ; WASM32: icmp ule i64 {{.*}}, 1 %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1") ret i1 %x } diff --git a/test/Transforms/LowerTypeTests/import-unsat.ll b/test/Transforms/LowerTypeTests/import-unsat.ll index 7ca70f2636fd..7410bc4b4d88 100644 --- a/test/Transforms/LowerTypeTests/import-unsat.ll +++ b/test/Transforms/LowerTypeTests/import-unsat.ll @@ -10,7 +10,7 @@ ; SUMMARY-NEXT: typeid1: ; SUMMARY-NEXT: TTRes: ; SUMMARY-NEXT: Kind: Unsat -; SUMMARY-NEXT: SizeBitWidth: 0 +; SUMMARY-NEXT: SizeM1BitWidth: 0 target datalayout = "e-p:32:32" diff --git a/test/Transforms/LowerTypeTests/simple.ll b/test/Transforms/LowerTypeTests/simple.ll index 91b94184420b..cedfcb4a63a0 100644 --- a/test/Transforms/LowerTypeTests/simple.ll +++ b/test/Transforms/LowerTypeTests/simple.ll @@ -69,7 +69,7 @@ define i1 @foo(i32* %p) { ; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2 ; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30 ; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]] - ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 68 + ; CHECK: [[R6:%[^ ]*]] = icmp ule i32 [[R5]], 67 ; CHECK: br i1 [[R6]] ; CHECK: [[R8:%[^ ]*]] = getelementptr i8, i8* @bits_use.{{[0-9]*}}, i32 [[R5]] @@ -96,7 +96,7 @@ define i1 @bar(i32* %p) { ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 8 ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 24 ; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]] - ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 2 + ; CHECK: [[S6:%[^ ]*]] = icmp ule i32 [[S5]], 1 %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid2") ; CHECK: ret i1 [[S6]] @@ -112,7 +112,7 @@ define i1 @baz(i32* %p) { ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 2 ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 30 ; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]] - ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 66 + ; CHECK: [[T6:%[^ ]*]] = icmp ule i32 [[T5]], 65 ; CHECK: br i1 [[T6]] ; CHECK: [[T8:%[^ ]*]] = getelementptr i8, i8* @bits_use{{(\.[0-9]*)?}}, i32 [[T5]] diff --git a/test/Transforms/NewGVN/assume-equal.ll b/test/Transforms/NewGVN/assume-equal.ll index b6c2a7afb294..7e009192064a 100644 --- a/test/Transforms/NewGVN/assume-equal.ll +++ b/test/Transforms/NewGVN/assume-equal.ll @@ -66,22 +66,20 @@ if.then: ; preds = %entry %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0 %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)** %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8 -; FIXME: those loads could be also direct, but right now the invariant.group -; analysis works only on single block -; CHECK-NOT: call i32 @_ZN1A3fooEv( +; CHECK: call i32 @_ZN1A3fooEv( %callx = tail call i32 %call1(%struct.A* %0) #1 %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0 %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)** %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8 -; CHECK-NOT: call i32 @_ZN1A3fooEv( +; CHECK: call i32 @_ZN1A3fooEv( %cally = tail call i32 %call4(%struct.A* %0) #1 %b = bitcast i8* %call to %struct.A** %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0 %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)** %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8 -; CHECK-NOT: call i32 @_ZN1A3fooEv( +; CHECK: call i32 @_ZN1A3fooEv( %unknown = tail call i32 %vfun(%struct.A* %0) #1 br label %if.end diff --git a/test/Transforms/NewGVN/invariant.group.ll b/test/Transforms/NewGVN/invariant.group.ll index 80c6e05a8e24..c421df6bd3b1 100644 --- a/test/Transforms/NewGVN/invariant.group.ll +++ b/test/Transforms/NewGVN/invariant.group.ll @@ -393,6 +393,45 @@ define void @testNotGlobal() { ret void } +; CHECK-LABEL: define void @handling_loops() +define void @handling_loops() { + %a = alloca %struct.A, align 8 + %1 = bitcast %struct.A* %a to i8* + %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0 + %3 = load i8, i8* @unknownPtr, align 4 + %4 = icmp sgt i8 %3, 0 + br i1 %4, label %.lr.ph.i, label %_Z2g2R1A.exit + +.lr.ph.i: ; preds = %0 + %5 = bitcast %struct.A* %a to void (%struct.A*)*** + %6 = load i8, i8* @unknownPtr, align 4 + %7 = icmp sgt i8 %6, 1 + br i1 %7, label %._crit_edge.preheader, label %_Z2g2R1A.exit + +._crit_edge.preheader: ; preds = %.lr.ph.i + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.preheader, %._crit_edge + %8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ] + %.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0 + %9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8 +; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a) + call void %9(%struct.A* nonnull %a) #3 + +; CHECK-NOT: call void % + %10 = add nuw nsw i8 %8, 1 + %11 = load i8, i8* @unknownPtr, align 4 + %12 = icmp slt i8 %10, %11 + br i1 %12, label %._crit_edge, label %_Z2g2R1A.exit.loopexit + +_Z2g2R1A.exit.loopexit: ; preds = %._crit_edge + br label %_Z2g2R1A.exit + +_Z2g2R1A.exit: ; preds = %_Z2g2R1A.exit.loopexit, %.lr.ph.i, %0 + ret void +} + declare void @foo(i8*) declare void @foo2(i8*, i8) diff --git a/test/Transforms/NewGVN/pr31594.ll b/test/Transforms/NewGVN/pr31594.ll new file mode 100644 index 000000000000..0cdac1a7fff4 --- /dev/null +++ b/test/Transforms/NewGVN/pr31594.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @patatino(i8* %blah, i32 %choice) { +; CHECK-LABEL: @patatino( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[FOO:%.*]] = phi i8* [ [[BLAH:%.*]], [[ENTRY:%.*]] ], [ null, [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: switch i32 [[CHOICE:%.*]], label [[WHILE_BODY]] [ +; CHECK-NEXT: i32 -1, label [[WHILE_END:%.*]] +; CHECK-NEXT: i32 40, label [[LAND_END:%.*]] +; CHECK-NEXT: ] +; CHECK: land.end: +; CHECK-NEXT: br label [[WHILE_END]] +; CHECK: while.body: +; CHECK-NEXT: br label [[WHILE_COND]] +; CHECK: while.end: +; CHECK-NEXT: store i8 0, i8* [[FOO]], align 1 +; CHECK-NEXT: store i8 0, i8* [[BLAH]], align 1 +; CHECK-NEXT: ret void +; +entry: + br label %while.cond + +while.cond: + %foo = phi i8* [ %blah, %entry ], [ null, %while.body ] + switch i32 %choice, label %while.body [ + i32 -1, label %while.end + i32 40, label %land.end + ] + +land.end: + br label %while.end + +while.body: + br label %while.cond + +while.end: + %foo.lcssa = phi i8* [ %foo, %land.end ], [ %foo, %while.cond ] +;; These two stores will initially be considered equivalent, but then proven not. +;; the second store would previously end up deciding it's equivalent to a previous +;; store, but it was really just finding an optimistic version of itself +;; in the congruence class. + store i8 0, i8* %foo.lcssa, align 1 + %0 = load i8, i8* %blah, align 1 + %loaded = icmp eq i8 %0, 0 + store i8 0, i8* %blah, align 1 + ret void +} + + +;; This is an example of a case where the memory states are equivalent solely due to unreachability, +;; but the stores are not equal. +define void @foo(i8* %arg) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP:%.*]] = phi i8* [ [[ARG:%.*]], [[BB:%.*]] ], [ null, [[BB2:%.*]] ] +; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb3: +; CHECK-NEXT: store i8 0, i8* [[TMP]], !g !0 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br label [[BB6:%.*]] +; CHECK: bb6: +; CHECK-NEXT: br i1 undef, label [[BB9:%.*]], label [[BB7:%.*]] +; CHECK: bb7: +; CHECK-NEXT: switch i8 0, label [[BB6]] [ +; CHECK-NEXT: i8 6, label [[BB8:%.*]] +; CHECK-NEXT: ] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb9: +; CHECK-NEXT: store i8 0, i8* [[ARG]], !g !0 +; CHECK-NEXT: unreachable +; +bb: + br label %bb1 + +bb1: ; preds = %bb2, %bb + %tmp = phi i8* [ %arg, %bb ], [ null, %bb2 ] + br i1 undef, label %bb3, label %bb2 + +bb2: ; preds = %bb1 + br label %bb1 + +bb3: ; preds = %bb1 + store i8 0, i8* %tmp, !g !0 + br label %bb4 + +bb4: ; preds = %bb8, %bb3 + %tmp5 = phi i8* [ null, %bb8 ], [ %arg, %bb3 ] + br label %bb6 + +bb6: ; preds = %bb7, %bb4 + br i1 undef, label %bb9, label %bb7 + +bb7: ; preds = %bb6 + switch i8 0, label %bb6 [ + i8 6, label %bb8 + ] + +bb8: ; preds = %bb7 + store i8 undef, i8* %tmp5, !g !0 + br label %bb4 + +bb9: ; preds = %bb6 + %tmp10 = phi i8* [ %tmp5, %bb6 ] + store i8 0, i8* %tmp10, !g !0 + unreachable +} + +!0 = !{} diff --git a/test/Transforms/PGOProfile/Inputs/multiple_hash_profile.proftext b/test/Transforms/PGOProfile/Inputs/multiple_hash_profile.proftext new file mode 100644 index 000000000000..5bf67fb2bfaf --- /dev/null +++ b/test/Transforms/PGOProfile/Inputs/multiple_hash_profile.proftext @@ -0,0 +1,36 @@ +# IR level Instrumentation Flag +:ir +_Z3fooi +# Func Hash: +72057606922829823 +# Num Counters: +2 +# Counter Values: +18 +12 + +_Z3fooi +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +0 + +_Z3bari +# Func Hash: +72057606922829823 +# Num Counters: +2 +# Counter Values: +0 +0 + +_Z4m2f1v +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +1 + diff --git a/test/Transforms/PGOProfile/comdat_internal.ll b/test/Transforms/PGOProfile/comdat_internal.ll index 25dafbea1035..7df6f91fe729 100644 --- a/test/Transforms/PGOProfile/comdat_internal.ll +++ b/test/Transforms/PGOProfile/comdat_internal.ll @@ -4,17 +4,17 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" $foo = comdat any -; CHECK: $foo.[[FOO_HASH:[0-9]+]] = comdat any +; CHECK: $foo = comdat any ; CHECK: $__llvm_profile_raw_version = comdat any -; CHECK: $__profv__stdin__foo.[[FOO_HASH]] = comdat any +; CHECK: $__profv__stdin__foo.[[FOO_HASH:[0-9]+]] = comdat any @bar = global i32 ()* @foo, align 8 ; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat -; CHECK: @__profn__stdin__foo.[[FOO_HASH]] = private constant [23 x i8] c"<stdin>:foo.[[FOO_HASH]]" +; CHECK: @__profn__stdin__foo = private constant [11 x i8] c"<stdin>:foo" ; CHECK: @__profc__stdin__foo.[[FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 -; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 6965568665848889497, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null +; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null ; CHECK-NOT: bitcast (i32 ()* @foo to i8*) ; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 ; CHECK: @__llvm_prf_nm diff --git a/test/Transforms/PGOProfile/comdat_rename.ll b/test/Transforms/PGOProfile/comdat_rename.ll index b69c802093b4..eb9ddb4a1cea 100644 --- a/test/Transforms/PGOProfile/comdat_rename.ll +++ b/test/Transforms/PGOProfile/comdat_rename.ll @@ -1,7 +1,7 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-linux -pgo-instr-gen -S | FileCheck --check-prefixes COMMON,ELFONLY %s -; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=pgo-instr-gen -S | FileCheck --check-prefixes COMMON,ELFONLY %s -; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -pgo-instr-gen -S | FileCheck --check-prefixes COMMON,COFFONLY %s -; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -passes=pgo-instr-gen -S | FileCheck --check-prefixes COMMON,COFFONLY %s +; RUN: opt < %s -mtriple=x86_64-unknown-linux -pgo-instr-gen -do-comdat-renaming=true -S | FileCheck --check-prefixes COMMON,ELFONLY %s +; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=pgo-instr-gen -do-comdat-renaming=true -S | FileCheck --check-prefixes COMMON,ELFONLY %s +; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -pgo-instr-gen -do-comdat-renaming=true -S | FileCheck --check-prefixes COMMON,COFFONLY %s +; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -passes=pgo-instr-gen -do-comdat-renaming=true -S | FileCheck --check-prefixes COMMON,COFFONLY %s ; Rename Comdat group and its function. $f = comdat any @@ -38,22 +38,10 @@ define linkonce void @tf2() comdat($tf) { ret void } -; Renaming Comdat with aliases. -$f_with_alias = comdat any -; COMMON: $f_with_alias.[[SINGLEBB_HASH]] = comdat any -@af = alias void (...), bitcast (void ()* @f_with_alias to void (...)*) -; COFFONLY: @af.[[SINGLEBB_HASH]] = alias void (...), bitcast (void ()* @f_with_alias.[[SINGLEBB_HASH]] to -; ELFONLY-DAG: @af.[[SINGLEBB_HASH]] = alias void (...), bitcast (void ()* @f_with_alias.[[SINGLEBB_HASH]] to -define linkonce_odr void @f_with_alias() comdat($f_with_alias) { - ret void -} - ; Rename AvailableExternallyLinkage functions ; ELFONLY-DAG: $aef.[[SINGLEBB_HASH]] = comdat any ; ELFONLY: @f = weak alias void (), void ()* @f.[[SINGLEBB_HASH]] -; ELFONLY: @f_with_alias = weak alias void (), void ()* @f_with_alias.[[SINGLEBB_HASH]] -; ELFONLY: @af = weak alias void (...), void (...)* @af.[[SINGLEBB_HASH]] ; ELFONLY: @aef = weak alias void (), void ()* @aef.[[SINGLEBB_HASH]] define available_externally void @aef() { diff --git a/test/Transforms/PGOProfile/indirect_call_profile.ll b/test/Transforms/PGOProfile/indirect_call_profile.ll index 409c29ef8728..e1f499c08a7b 100644 --- a/test/Transforms/PGOProfile/indirect_call_profile.ll +++ b/test/Transforms/PGOProfile/indirect_call_profile.ll @@ -54,7 +54,7 @@ bb11: ; preds = %bb2 } ; Test that comdat function's address is recorded. -; LOWER: @__profd_foo3.[[FOO3_HASH:[0-9]+]] = linkonce_odr{{.*}}@foo3.[[FOO3_HASH]] +; LOWER: @__profd_foo3.[[FOO3_HASH:[0-9]+]] = linkonce_odr{{.*}}@__profc_foo3.[[FOO3_HASH]] ; Function Attrs: nounwind uwtable define linkonce_odr i32 @foo3() comdat { ret i32 1 diff --git a/test/Transforms/PGOProfile/multiple_hash_profile.ll b/test/Transforms/PGOProfile/multiple_hash_profile.ll new file mode 100644 index 000000000000..f4041830f8f8 --- /dev/null +++ b/test/Transforms/PGOProfile/multiple_hash_profile.ll @@ -0,0 +1,36 @@ +; RUN: llvm-profdata merge %S/Inputs/multiple_hash_profile.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$_Z3fooi = comdat any + +@g2 = local_unnamed_addr global i32 (i32)* null, align 8 + +define i32 @_Z3bari(i32 %i) { +entry: + %cmp = icmp sgt i32 %i, 2 + %mul = select i1 %cmp, i32 1, i32 %i + %retval.0 = mul nsw i32 %mul, %i + ret i32 %retval.0 +} + +define void @_Z4m2f1v() { +entry: + store i32 (i32)* @_Z3fooi, i32 (i32)** @g2, align 8 + ret void +} + +define linkonce_odr i32 @_Z3fooi(i32 %i) comdat { +entry: + %cmp.i = icmp sgt i32 %i, 2 + %mul.i = select i1 %cmp.i, i32 1, i32 %i +; CHECK: %mul.i = select i1 %cmp.i, i32 1, i32 %i +; CHECK-SAME !prof ![[BW:[0-9]+]] +; CHECK ![[BW]] = !{!"branch_weights", i32 12, i32 6} + %retval.0.i = mul nsw i32 %mul.i, %i + ret i32 %retval.0.i +} + + diff --git a/test/Transforms/SLPVectorizer/X86/pr31599.ll b/test/Transforms/SLPVectorizer/X86/pr31599.ll new file mode 100644 index 000000000000..64e0f7be7e2e --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/pr31599.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define <2 x float> @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SOURCE:%.*]] = insertelement <2 x float> undef, float undef, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = fsub <2 x float> [[SOURCE]], [[SOURCE]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[RES2:%.*]] = insertelement <2 x float> [[RES1]], float [[TMP2]], i32 1 +; CHECK-NEXT: ret <2 x float> [[RES2]] +; +entry: + %source = insertelement <2 x float> undef, float undef, i32 0 + %e0 = extractelement <2 x float> %source, i32 0 + %e0.dup = extractelement <2 x float> %source, i32 0 + %sub1 = fsub float %e0, %e0.dup + %e1 = extractelement <2 x float> %source, i32 1 + %e1.dup = extractelement <2 x float> %source, i32 1 + %sub2 = fsub float %e1, %e1.dup + %res1 = insertelement <2 x float> undef, float %sub1, i32 0 + %res2 = insertelement <2 x float> %res1, float %sub2, i32 1 + ret <2 x float> %res2 +} + +!llvm.ident = !{!0, !0} + +!0 = !{!"clang version 4.0.0 "} diff --git a/test/Transforms/StructurizeCFG/no-branch-to-entry.ll b/test/Transforms/StructurizeCFG/no-branch-to-entry.ll index 2e22c8715347..1db1060ca821 100644 --- a/test/Transforms/StructurizeCFG/no-branch-to-entry.ll +++ b/test/Transforms/StructurizeCFG/no-branch-to-entry.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s +; RUN: opt -S -o - -structurizecfg -verify-dom-info < %s | FileCheck %s ; CHECK-LABEL: @no_branch_to_entry_undef( ; CHECK: entry: |
