summaryrefslogtreecommitdiff
path: root/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms/LoopVectorize')
-rw-r--r--test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll26
-rw-r--r--test/Transforms/LoopVectorize/SystemZ/addressing.ll72
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll124
3 files changed, 206 insertions, 16 deletions
diff --git a/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
new file mode 100644
index 000000000000..a7f414b8694b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
@@ -0,0 +1,26 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -S -debug-only=loop-vectorize 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: all_scalar
+; CHECK: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2
+; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
+; CHECK: LV: Not considering vector loop of width 2 because it will not generate any vector instructions
+;
+define void @all_scalar(i64* %a, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %tmp0 = getelementptr i64, i64* %a, i64 %i
+ store i64 0, i64* %tmp0, align 1
+ %i.next = add nuw nsw i64 %i, 2
+ %cond = icmp eq i64 %i.next, %n
+ br i1 %cond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/SystemZ/addressing.ll b/test/Transforms/LoopVectorize/SystemZ/addressing.ll
new file mode 100644
index 000000000000..1f7a6d29c57c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/SystemZ/addressing.ll
@@ -0,0 +1,72 @@
+; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize -dce \
+; RUN: -instcombine -force-vector-width=2 < %s | FileCheck %s
+;
+; Test that loop vectorizer does not generate vector addresses that must then
+; always be extracted.
+
+; Check that the addresses for a scalarized memory access is not extracted
+; from a vector register.
+define i32 @foo(i32* nocapture %A) {
+;CHECK-LABEL: @foo(
+;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+;CHECK: %0 = shl nsw i64 %index, 2
+;CHECK: %1 = shl i64 %index, 2
+;CHECK: %2 = or i64 %1, 4
+;CHECK: %3 = getelementptr inbounds i32, i32* %A, i64 %0
+;CHECK: %4 = getelementptr inbounds i32, i32* %A, i64 %2
+;CHECK: store i32 4, i32* %3, align 4
+;CHECK: store i32 4, i32* %4, align 4
+
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
+ store i32 4, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret i32 undef
+}
+
+
+; Check that a load of address is scalarized.
+define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
+;CHECK-LABEL: @foo1(
+;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+;CHECK: %0 = or i64 %index, 1
+;CHECK: %1 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %index
+;CHECK: %2 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %0
+;CHECK: %3 = load i32*, i32** %1, align 8
+;CHECK: %4 = load i32*, i32** %2, align 8
+;CHECK: %5 = load i32, i32* %3, align 4
+;CHECK: %6 = load i32, i32* %4, align 4
+;CHECK: %7 = insertelement <2 x i32> undef, i32 %5, i32 0
+;CHECK: %8 = insertelement <2 x i32> %7, i32 %6, i32 1
+;CHECK: %9 = getelementptr inbounds i32, i32* %A, i64 %index
+;CHECK: %10 = bitcast i32* %9 to <2 x i32>*
+;CHECK: store <2 x i32> %8, <2 x i32>* %10, align 4
+
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %ptr = getelementptr inbounds i32*, i32** %PtrPtr, i64 %indvars.iv
+ %el = load i32*, i32** %ptr
+ %v = load i32, i32* %el
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ store i32 %v, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index b2933c4b56f2..4dc62d86453f 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -11,38 +11,38 @@
; break;
; }
; }
+; File, line, and column should match those specified in the metadata
+; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:4:5: loop not vectorized
; void test_disabled(int *A, int Length) {
; #pragma clang loop vectorize(disable) interleave(disable)
; for (int i = 0; i < Length; i++)
; A[i] = i;
; }
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
; void test_array_bounds(int *A, int *B, int Length) {
; #pragma clang loop vectorize(enable)
; for (int i = 0; i < Length; i++)
; A[i] = A[B[i]];
; }
-
-; File, line, and column should match those specified in the metadata
-; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
-; CHECK: remark: source.cpp:4:5: loop not vectorized
-; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
; CHECK: remark: source.cpp:19:5: loop not vectorized
; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
-; CHECK: _Z4testPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z13test_disabledPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z17test_array_boundsPiS_i
-; CHECK-NOT: x i32>
-; CHECK: ret
+; int foo();
+; void test_multiple_failures(int *A) {
+; int k = 0;
+; #pragma clang loop vectorize(enable) interleave(enable)
+; for (int i = 0; i < 1000; i+=A[i]) {
+; if (A[i])
+; k = foo();
+; }
+; return k;
+; }
+; CHECK: remark: source.cpp:29:7: loop not vectorized: control flow cannot be substituted for a select
+; CHECK: remark: source.cpp:27:3: loop not vectorized
; YAML: --- !Analysis
; YAML-NEXT: Pass: loop-vectorize
@@ -98,6 +98,41 @@
; YAML-NEXT: - String: 'loop not vectorized: '
; YAML-NEXT: - String: failed explicitly specified loop vectorization
; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: NoCFGForSelect
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 29, Column: 7 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: control flow cannot be substituted for a select
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: NonReductionValueUsedOutsideLoop
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: value that could not be identified as reduction is used outside the loop
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: CantComputeNumberOfIterations
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: could not determine number of loop iterations
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Missed
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: MissedDetails
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: loop not vectorized
+; YAML-NEXT: ...
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -124,6 +159,10 @@ for.end: ; preds = %for.body, %entry
ret void, !dbg !24
}
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
entry:
@@ -144,6 +183,10 @@ for.end: ; preds = %for.body, %entry
ret void, !dbg !31
}
+; CHECK: _Z13test_disabledPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
entry:
@@ -174,6 +217,45 @@ for.end: ; preds = %for.end.loopexit, %
ret void, !dbg !36
}
+; CHECK: _Z17test_array_boundsPiS_i
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; Function Attrs: nounwind uwtable
+define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 !dbg !46 {
+entry:
+ br label %for.body, !dbg !38
+
+for.body: ; preds = %entry, %for.inc
+ %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+ %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09, !dbg !40
+ %0 = load i32, i32* %arrayidx, align 4, !dbg !40
+ %tobool = icmp eq i32 %0, 0, !dbg !40
+ br i1 %tobool, label %for.inc, label %if.then, !dbg !40
+
+if.then: ; preds = %for.body
+ %call = tail call i32 (...) @foo(), !dbg !41
+ %.pre = load i32, i32* %arrayidx, align 4
+ br label %for.inc, !dbg !42
+
+for.inc: ; preds = %for.body, %if.then
+ %1 = phi i32 [ %.pre, %if.then ], [ 0, %for.body ], !dbg !43
+ %k.1 = phi i32 [ %call, %if.then ], [ %k.09, %for.body ]
+ %add = add nsw i32 %1, %i.09, !dbg !44
+ %cmp = icmp slt i32 %add, 1000, !dbg !45
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !38
+
+for.cond.cleanup: ; preds = %for.inc
+ ret i32 %k.1, !dbg !39
+}
+
+declare i32 @foo(...)
+
+; CHECK: test_multiple_failure
+; CHECK-NOT: x i32>
+; CHECK: ret
+
attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
@@ -216,3 +298,13 @@ attributes #0 = { nounwind }
!34 = !{!34, !15}
!35 = !DILocation(line: 19, column: 5, scope: !33)
!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, variables: !2)