summaryrefslogtreecommitdiff
path: root/test/CodeGen/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/AArch64')
-rw-r--r--test/CodeGen/AArch64/arm64-memset-inline.ll4
-rw-r--r--test/CodeGen/AArch64/cmp-frameindex.ll19
-rw-r--r--test/CodeGen/AArch64/falkor-hwpf-fix.mir302
-rw-r--r--test/CodeGen/AArch64/fastcc.ll12
-rw-r--r--test/CodeGen/AArch64/ldst-opt.ll15
-rw-r--r--test/CodeGen/AArch64/thread-pointer.ll60
6 files changed, 335 insertions, 77 deletions
diff --git a/test/CodeGen/AArch64/arm64-memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll
index 384aaa8541df2..8c872cc615001 100644
--- a/test/CodeGen/AArch64/arm64-memset-inline.ll
+++ b/test/CodeGen/AArch64/arm64-memset-inline.ll
@@ -12,9 +12,9 @@ entry:
define void @t2() nounwind ssp {
entry:
; CHECK-LABEL: t2:
+; CHECK: stp xzr, xzr, [sp, #16]
; CHECK: strh wzr, [sp, #32]
-; CHECK: stp xzr, xzr, [sp, #8]
-; CHECK: str xzr, [sp, #24]
+; CHECK: str xzr, [sp, #8]
%buf = alloca [26 x i8], align 1
%0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
diff --git a/test/CodeGen/AArch64/cmp-frameindex.ll b/test/CodeGen/AArch64/cmp-frameindex.ll
new file mode 100644
index 0000000000000..2d01b76e186c4
--- /dev/null
+++ b/test/CodeGen/AArch64/cmp-frameindex.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+
+; CHECK: test_frameindex_cmp:
+; CHECK: cmn sp, #{{[0-9]+}}
+define void @test_frameindex_cmp() {
+ %stack = alloca i8
+ %stack.int = ptrtoint i8* %stack to i64
+ %cmp = icmp ne i64 %stack.int, 0
+ br i1 %cmp, label %bb1, label %bb2
+
+bb1:
+ call void @bar()
+ ret void
+
+bb2:
+ ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/AArch64/falkor-hwpf-fix.mir b/test/CodeGen/AArch64/falkor-hwpf-fix.mir
index 54c8b16a9b439..70da36cdb89a4 100644
--- a/test/CodeGen/AArch64/falkor-hwpf-fix.mir
+++ b/test/CodeGen/AArch64/falkor-hwpf-fix.mir
@@ -1,12 +1,7 @@
# RUN: llc -mtriple=aarch64-linux-gnu -mcpu=falkor -run-pass falkor-hwpf-fix-late -o - %s | FileCheck %s
---- |
- @g = external global i32
-
- define void @hwpf1() { ret void }
- define void @hwpf2() { ret void }
-...
---
-# Verify that the tag collision between the loads is resolved.
+# Verify that the tag collision between the loads is resolved for various load opcodes.
+
# CHECK-LABEL: name: hwpf1
# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
# CHECK: LDRWui %[[BASE]], 0
@@ -17,7 +12,7 @@ body: |
bb.0:
liveins: %w0, %x1
- %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
+ %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4)
%w2 = LDRWui %x1, 1
%w0 = SUBWri %w0, 1, 0
@@ -28,19 +23,147 @@ body: |
RET_ReallyLR
...
---
-# Verify that the tag collision between the loads is resolved and written back for post increment addressing.
# CHECK-LABEL: name: hwpf2
# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i64 %q2, 0, %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %q2 = LD1i64 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf3
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i8 %q2, 0, %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %q2 = LD1i8 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf4
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Onev1d %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %d2 = LD1Onev1d %x1 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf5
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Twov1d %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %d2_d3 = LD1Twov1d %x1 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf6
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPQi %[[BASE]]
+# CHECK: LDRWui %x1, 3
+name: hwpf6
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %q2, %q3 = LDPQi %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 3
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf7
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPXi %[[BASE]]
+# CHECK: LDRWui %x1, 2
+name: hwpf7
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %x2, %x3 = LDPXi %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 2
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# Verify that the tag collision between the loads is resolved and written back
+# for post increment addressing for various load opcodes.
+
+# CHECK-LABEL: name: hwpfinc1
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
# CHECK: LDRWpost %[[BASE]], 0
# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
# CHECK: LDRWui %x1, 1
-name: hwpf2
+name: hwpfinc1
tracksRegLiveness: true
body: |
bb.0:
liveins: %w0, %x1
- %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
+ %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4)
%w2 = LDRWui %x1, 1
%w0 = SUBWri %w0, 1, 0
@@ -50,3 +173,160 @@ body: |
bb.1:
RET_ReallyLR
...
+---
+# CHECK-LABEL: name: hwpfinc2
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i64_POST %q2, 0, %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 1
+name: hwpfinc2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %x1, %q2 = LD1i64_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 132
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc3
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i8_POST %q2, 0, %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 132
+name: hwpfinc3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %x1, %q2 = LD1i8_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 132
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc4
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Rv1d_POST %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 252
+name: hwpfinc4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %x1, %d2 = LD1Rv1d_POST %x1, %xzr :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 252
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc5
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD3Threev2s_POST %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWroX %x17, %x0
+name: hwpfinc5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17, %q2
+
+ %x1, %d2_d3_d4 = LD3Threev2s_POST %x1, %x0 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWroX %x17, %x0, 0, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc6
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPDpost %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x17, 2
+name: hwpfinc6
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17, %q2
+
+ %x1, %d2, %d3 = LDPDpost %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w16 = LDRWui %x17, 2
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc7
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPXpost %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x17, 2
+name: hwpfinc7
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17, %q2
+
+ %x1, %x2, %x3 = LDPXpost %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w18 = LDRWui %x17, 2
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# Check that we handle case of strided load with no HW prefetcher tag correctly.
+
+# CHECK-LABEL: name: hwpf_notagbug
+# CHECK-NOT: ORRXrs %xzr
+# CHECK: LDARW %x1
+# CHECK-NOT: ORRXrs %xzr
+# CHECK: LDRWui %x1
+name: hwpf_notagbug
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17
+
+ %w1 = LDARW %x1 :: ("aarch64-strided-access" load 4)
+ %w1 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4)
+ %w17 = LDRWui %x17, 0 :: ("aarch64-strided-access" load 4)
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
index fcc852263b481..3ea6df5be49c5 100644
--- a/test/CodeGen/AArch64/fastcc.ll
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -21,9 +21,11 @@ define fastcc void @func_stack0() {
call fastcc void @func_stack8([8 x i32] undef, i32 42)
; CHECK: bl func_stack8
; CHECK-NOT: sub sp, sp,
+; CHECK-NOT: [sp, #{{[-0-9]+}}]!
+; CHECK-NOT: [sp], #{{[-0-9]+}}
; CHECK-TAIL: bl func_stack8
-; CHECK-TAIL: sub sp, sp, #16
+; CHECK-TAIL: stp xzr, xzr, [sp, #-16]!
call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
@@ -72,10 +74,12 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
call fastcc void @func_stack8([8 x i32] undef, i32 42)
; CHECK: bl func_stack8
; CHECK-NOT: sub sp, sp,
+; CHECK-NOT: [sp, #{{[-0-9]+}}]!
+; CHECK-NOT: [sp], #{{[-0-9]+}}
; CHECK-TAIL: bl func_stack8
-; CHECK-TAIL: sub sp, sp, #16
+; CHECK-TAIL: stp xzr, xzr, [sp, #-16]!
call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
@@ -116,9 +120,11 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
call fastcc void @func_stack8([8 x i32] undef, i32 42)
; CHECK: bl func_stack8
; CHECK-NOT: sub sp, sp,
+; CHECK-NOT: [sp, #{{[-0-9]+}}]!
+; CHECK-NOT: [sp], #{{[-0-9]+}}
; CHECK-TAIL: bl func_stack8
-; CHECK-TAIL: sub sp, sp, #16
+; CHECK-TAIL: stp xzr, xzr, [sp, #-16]!
call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
index 975e5ae8b9531..2b98d3215e49a 100644
--- a/test/CodeGen/AArch64/ldst-opt.ll
+++ b/test/CodeGen/AArch64/ldst-opt.ll
@@ -1667,4 +1667,17 @@ entry:
ret void
}
-
+; Check for bug 34674 where invalid add of xzr was being generated.
+; CHECK-LABEL: bug34674:
+; CHECK: // %entry
+; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr
+; CHECK-DAG: stp [[ZREG]], [[ZREG]], [x0]
+; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1
+define i64 @bug34674(<2 x i64>* %p) {
+entry:
+ store <2 x i64> zeroinitializer, <2 x i64>* %p
+ %p2 = bitcast <2 x i64>* %p to i64*
+ %ld = load i64, i64* %p2
+ %add = add i64 %ld, 1
+ ret i64 %add
+}
diff --git a/test/CodeGen/AArch64/thread-pointer.ll b/test/CodeGen/AArch64/thread-pointer.ll
deleted file mode 100644
index 7fea719436f9f..0000000000000
--- a/test/CodeGen/AArch64/thread-pointer.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
-
-@x = thread_local local_unnamed_addr global i32 0, align 4
-@y = thread_local local_unnamed_addr global i32 0, align 4
-
-; Machine LICM should hoist the mrs into the loop preheader.
-; CHECK-LABEL: @test1
-; CHECK: BB#1:
-; CHECK: mrs x[[BASE:[0-9]+]], TPIDR_EL0
-; CHECK: add x[[REG1:[0-9]+]], x[[BASE]], :tprel_hi12:x
-; CHECK: add x[[REG2:[0-9]+]], x[[REG1]], :tprel_lo12_nc:x
-;
-; CHECK: .LBB0_2:
-; CHECK: ldr w0, [x[[REG2]]]
-; CHECK: bl bar
-; CHECK: subs w[[REG3:[0-9]+]], w{{[0-9]+}}, #1
-; CHECK: b.ne .LBB0_2
-
-define void @test1(i32 %n) local_unnamed_addr {
-entry:
- %cmp3 = icmp sgt i32 %n, 0
- br i1 %cmp3, label %bb1, label %bb2
-
-bb1:
- br label %for.body
-
-for.body:
- %i.04 = phi i32 [ %inc, %for.body ], [ 0, %bb1 ]
- %0 = load i32, i32* @x, align 4
- tail call void @bar(i32 %0) #2
- %inc = add nuw nsw i32 %i.04, 1
- %exitcond = icmp eq i32 %inc, %n
- br i1 %exitcond, label %bb2, label %for.body
-
-bb2:
- ret void
-}
-
-; Machine CSE should combine the the mrs between the load of %x and %y.
-; CHECK-LABEL: @test2
-; CHECK: mrs x{{[0-9]+}}, TPIDR_EL0
-; CHECK-NOT: mrs x{{[0-9]+}}, TPIDR_EL0
-; CHECK: ret
-define void @test2(i32 %c) local_unnamed_addr #0 {
-entry:
- %0 = load i32, i32* @x, align 4
- tail call void @bar(i32 %0) #2
- %cmp = icmp eq i32 %c, 0
- br i1 %cmp, label %if.end, label %if.then
-
-if.then:
- %1 = load i32, i32* @y, align 4
- tail call void @bar(i32 %1) #2
- br label %if.end
-
-if.end:
- ret void
-}
-
-declare void @bar(i32) local_unnamed_addr