summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/X86/masked_memop.ll7
-rw-r--r--test/Transforms/InstMerge/st_sink_bugfix_22613.ll114
-rw-r--r--test/Transforms/InstSimplify/load.ll19
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop.ll4
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop1.ll2
-rw-r--r--test/Transforms/LoopUnroll/tripcount-overflow.ll29
-rw-r--r--test/Transforms/LoopVectorize/X86/masked_load_store.ll82
7 files changed, 239 insertions, 18 deletions
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll
index 726d7125a6382..b5ff630f497a4 100644
--- a/test/CodeGen/X86/masked_memop.ll
+++ b/test/CodeGen/X86/masked_memop.ll
@@ -159,7 +159,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
}
; AVX2-LABEL: test15
-; AVX2: vpmaskmovq
+; AVX2: vpmaskmovd
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
@@ -176,8 +176,9 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
}
; AVX2-LABEL: test17
-; AVX2: vpmaskmovq
-; AVX2: vblendvpd
+; AVX2: vpmaskmovd
+; AVX2: vblendvps
+; AVX2: vpmovsxdq
define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
diff --git a/test/Transforms/InstMerge/st_sink_bugfix_22613.ll b/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
new file mode 100644
index 0000000000000..6b817f20d16bb
--- /dev/null
+++ b/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
@@ -0,0 +1,114 @@
+; ModuleID = 'bug.c'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; RUN: opt -O2 -S < %s | FileCheck %s
+
+; CHECK_LABEL: main
+; CHECK: if.end
+; CHECK: store
+; CHECK: memset
+; CHECK: if.then
+; CHECK: store
+; CHECK: memset
+
+@d = common global i32 0, align 4
+@b = common global i32 0, align 4
+@f = common global [1 x [3 x i8]] zeroinitializer, align 1
+@e = common global i32 0, align 4
+@c = common global i32 0, align 4
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define void @fn1() #0 {
+entry:
+ store i32 0, i32* @d, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc8, %entry
+ %0 = load i32* @d, align 4
+ %cmp = icmp slt i32 %0, 2
+ br i1 %cmp, label %for.body, label %for.end10
+
+for.body: ; preds = %for.cond
+ %1 = load i32* @d, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32* @b, align 4
+ %idxprom1 = sext i32 %2 to i64
+ %arrayidx = getelementptr inbounds [1 x [3 x i8]]* @f, i32 0, i64 %idxprom1
+ %arrayidx2 = getelementptr inbounds [3 x i8]* %arrayidx, i32 0, i64 %idxprom
+ store i8 0, i8* %arrayidx2, align 1
+ store i32 0, i32* @e, align 4
+ br label %for.cond3
+
+for.cond3: ; preds = %for.inc, %for.body
+ %3 = load i32* @e, align 4
+ %cmp4 = icmp slt i32 %3, 3
+ br i1 %cmp4, label %for.body5, label %for.end
+
+for.body5: ; preds = %for.cond3
+ %4 = load i32* @c, align 4
+ %tobool = icmp ne i32 %4, 0
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then: ; preds = %for.body5
+ %5 = load i32* @a, align 4
+ %dec = add nsw i32 %5, -1
+ store i32 %dec, i32* @a, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body5
+ %6 = load i32* @e, align 4
+ %idxprom6 = sext i32 %6 to i64
+ %arrayidx7 = getelementptr inbounds [3 x i8]* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0), i32 0, i64 %idxprom6
+ store i8 1, i8* %arrayidx7, align 1
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %7 = load i32* @e, align 4
+ %inc = add nsw i32 %7, 1
+ store i32 %inc, i32* @e, align 4
+ br label %for.cond3
+
+for.end: ; preds = %for.cond3
+ br label %for.inc8
+
+for.inc8: ; preds = %for.end
+ %8 = load i32* @d, align 4
+ %inc9 = add nsw i32 %8, 1
+ store i32 %inc9, i32* @d, align 4
+ br label %for.cond
+
+for.end10: ; preds = %for.cond
+ ret void
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ call void @fn1()
+ %0 = load i8* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0, i64 1), align 1
+ %conv = sext i8 %0 to i32
+ %cmp = icmp ne i32 %conv, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ call void @abort() #2
+ unreachable
+
+if.end: ; preds = %entry
+ ret i32 0
+}
+
+; Function Attrs: noreturn nounwind
+declare void @abort() #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noreturn nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 229288) (llvm/trunk 229286:229290M)"}
diff --git a/test/Transforms/InstSimplify/load.ll b/test/Transforms/InstSimplify/load.ll
new file mode 100644
index 0000000000000..92953cd0ebfbf
--- /dev/null
+++ b/test/Transforms/InstSimplify/load.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+@zeroinit = constant {} zeroinitializer
+@undef = constant {} undef
+
+define i32 @crash_on_zeroinit() {
+; CHECK-LABEL: @crash_on_zeroinit
+; CHECK: ret i32 0
+ %load = load i32* bitcast ({}* @zeroinit to i32*)
+ ret i32 %load
+}
+
+define i32 @crash_on_undef() {
+; CHECK-LABEL: @crash_on_undef
+; CHECK: ret i32 undef
+ %load = load i32* bitcast ({}* @undef to i32*)
+ ret i32 %load
+}
+
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index 3a8777bb14730..80571ecbd882d 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -4,9 +4,7 @@
; CHECK: %xtraiter = and i32 %n
; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
-; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
-; CHECK: br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split
+; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
; CHECK: for.body.prol:
; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index 38b4f32354a33..5ff75e33f7f86 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -3,7 +3,7 @@
; This tests that setting the unroll count works
; CHECK: for.body.prol:
-; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
+; CHECK: br label %for.body.preheader.split
; CHECK: for.body:
; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll
index d59368578ec22..052077cdd5dd7 100644
--- a/test/Transforms/LoopUnroll/tripcount-overflow.ll
+++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll
@@ -1,19 +1,28 @@
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-; When prologue is fully unrolled, the branch on its end is unconditional.
-; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
-; like in this example, where it comes from an argument.
-;
-; This test is based on an example from here:
-; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
-;
+; This test case documents how runtime loop unrolling handles the case
+; when the backedge-count is -1.
+
+; If %N, the backedge-taken count, is -1 then %0 unsigned-overflows
+; and is 0. %xtraiter too is 0, signifying that the total trip-count
+; is divisible by 2. The prologue then branches to the unrolled loop
+; and executes the 2^32 iterations there, in groups of 2.
+
+
+; CHECK: entry:
+; CHECK-NEXT: %0 = add i32 %N, 1
+; CHECK-NEXT: %xtraiter = and i32 %0, 1
+; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK-NEXT: br i1 %lcmp.mod, label %while.body.prol, label %entry.split
+
; CHECK: while.body.prol:
-; CHECK: br i1
+; CHECK: br label %entry.split
+
; CHECK: entry.split:
; Function Attrs: nounwind readnone ssp uwtable
-define i32 @foo(i32 %N) #0 {
+define i32 @foo(i32 %N) {
entry:
br label %while.body
@@ -26,5 +35,3 @@ while.body: ; preds = %while.body, %entry
while.end: ; preds = %while.body
ret i32 %i
}
-
-attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 0708471e643ad..9e2de8000587f 100644
--- a/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -418,3 +418,85 @@ for.end: ; preds = %for.cond
ret void
}
+; Reverse loop
+;void foo6(double *in, double *out, unsigned size, int *trigger) {
+;
+; for (int i=SIZE-1; i>=0; i--) {
+; if (trigger[i] > 0) {
+; out[i] = in[i] + (double) 0.5;
+; }
+; }
+;}
+;AVX2-LABEL: @foo6
+;AVX2: icmp sgt <4 x i32> %reverse, zeroinitializer
+;AVX2: shufflevector <4 x i1>{{.*}}<4 x i32> <i32 3, i32 2, i32 1, i32 0>
+;AVX2: call <4 x double> @llvm.masked.load.v4f64
+;AVX2: fadd <4 x double>
+;AVX2: call void @llvm.masked.store.v4f64
+;AVX2: ret void
+
+;AVX512-LABEL: @foo6
+;AVX512: icmp sgt <8 x i32> %reverse, zeroinitializer
+;AVX512: shufflevector <8 x i1>{{.*}}<8 x i32> <i32 7, i32 6, i32 5, i32 4
+;AVX512: call <8 x double> @llvm.masked.load.v8f64
+;AVX512: fadd <8 x double>
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+
+define void @foo6(double* %in, double* %out, i32 %size, i32* %trigger) {
+entry:
+ %in.addr = alloca double*, align 8
+ %out.addr = alloca double*, align 8
+ %size.addr = alloca i32, align 4
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store double* %in, double** %in.addr, align 8
+ store double* %out, double** %out.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 4095, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp sge i32 %0, 0
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %3, 0
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load double** %in.addr, align 8
+ %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
+ %6 = load double* %arrayidx3, align 8
+ %add = fadd double %6, 5.000000e-01
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double* %8, i64 %idxprom4
+ store double %add, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %9 = load i32* %i, align 4
+ %dec = add nsw i32 %9, -1
+ store i32 %dec, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+