diff options
Diffstat (limited to 'test/CodeGen/AArch64')
-rw-r--r-- | test/CodeGen/AArch64/arm64-memset-inline.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/AArch64/cmp-frameindex.ll | 19 | ||||
-rw-r--r-- | test/CodeGen/AArch64/falkor-hwpf-fix.mir | 302 | ||||
-rw-r--r-- | test/CodeGen/AArch64/fastcc.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/AArch64/ldst-opt.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/AArch64/thread-pointer.ll | 60 |
6 files changed, 335 insertions, 77 deletions
diff --git a/test/CodeGen/AArch64/arm64-memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll index 384aaa8541df2..8c872cc615001 100644 --- a/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -12,9 +12,9 @@ entry: define void @t2() nounwind ssp { entry: ; CHECK-LABEL: t2: +; CHECK: stp xzr, xzr, [sp, #16] ; CHECK: strh wzr, [sp, #32] -; CHECK: stp xzr, xzr, [sp, #8] -; CHECK: str xzr, [sp, #24] +; CHECK: str xzr, [sp, #8] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) diff --git a/test/CodeGen/AArch64/cmp-frameindex.ll b/test/CodeGen/AArch64/cmp-frameindex.ll new file mode 100644 index 0000000000000..2d01b76e186c4 --- /dev/null +++ b/test/CodeGen/AArch64/cmp-frameindex.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s + +; CHECK: test_frameindex_cmp: +; CHECK: cmn sp, #{{[0-9]+}} +define void @test_frameindex_cmp() { + %stack = alloca i8 + %stack.int = ptrtoint i8* %stack to i64 + %cmp = icmp ne i64 %stack.int, 0 + br i1 %cmp, label %bb1, label %bb2 + +bb1: + call void @bar() + ret void + +bb2: + ret void +} + +declare void @bar() diff --git a/test/CodeGen/AArch64/falkor-hwpf-fix.mir b/test/CodeGen/AArch64/falkor-hwpf-fix.mir index 54c8b16a9b439..70da36cdb89a4 100644 --- a/test/CodeGen/AArch64/falkor-hwpf-fix.mir +++ b/test/CodeGen/AArch64/falkor-hwpf-fix.mir @@ -1,12 +1,7 @@ # RUN: llc -mtriple=aarch64-linux-gnu -mcpu=falkor -run-pass falkor-hwpf-fix-late -o - %s | FileCheck %s ---- | - @g = external global i32 - - define void @hwpf1() { ret void } - define void @hwpf2() { ret void } -... --- -# Verify that the tag collision between the loads is resolved. +# Verify that the tag collision between the loads is resolved for various load opcodes. + # CHECK-LABEL: name: hwpf1 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 # CHECK: LDRWui %[[BASE]], 0 @@ -17,7 +12,7 @@ body: | bb.0: liveins: %w0, %x1 - %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4 from @g) + %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4) %w2 = LDRWui %x1, 1 %w0 = SUBWri %w0, 1, 0 @@ -28,19 +23,147 @@ body: | RET_ReallyLR ... --- -# Verify that the tag collision between the loads is resolved and written back for post increment addressing. # CHECK-LABEL: name: hwpf2 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1i64 %q2, 0, %[[BASE]] +# CHECK: LDRWui %x1, 0 +name: hwpf2 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %q2 = LD1i64 %q2, 0, %x1 :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf3 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1i8 %q2, 0, %[[BASE]] +# CHECK: LDRWui %x1, 0 +name: hwpf3 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %q2 = LD1i8 %q2, 0, %x1 :: ("aarch64-strided-access" load 4) + %w0 = LDRWui %x1, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf4 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1Onev1d %[[BASE]] +# CHECK: LDRWui %x1, 0 +name: hwpf4 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1 + + %d2 = LD1Onev1d %x1 :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf5 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1Twov1d %[[BASE]] +# CHECK: LDRWui %x1, 0 +name: hwpf5 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1 + + %d2_d3 = LD1Twov1d %x1 :: ("aarch64-strided-access" load 4) + %w0 = LDRWui %x1, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf6 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LDPQi %[[BASE]] +# CHECK: LDRWui %x1, 3 +name: hwpf6 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1 + + %q2, %q3 = LDPQi %x1, 3 :: ("aarch64-strided-access" load 4) + %w0 = LDRWui %x1, 3 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf7 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LDPXi %[[BASE]] +# CHECK: LDRWui %x1, 2 +name: hwpf7 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1 + + %x2, %x3 = LDPXi %x1, 3 :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 2 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# Verify that the tag collision between the loads is resolved and written back +# for post increment addressing for various load opcodes. + +# CHECK-LABEL: name: hwpfinc1 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 # CHECK: LDRWpost %[[BASE]], 0 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 # CHECK: LDRWui %x1, 1 -name: hwpf2 +name: hwpfinc1 tracksRegLiveness: true body: | bb.0: liveins: %w0, %x1 - %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4 from @g) + %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4) %w2 = LDRWui %x1, 1 %w0 = SUBWri %w0, 1, 0 @@ -50,3 +173,160 @@ body: | bb.1: RET_ReallyLR ... +--- +# CHECK-LABEL: name: hwpfinc2 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1i64_POST %q2, 0, %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x1, 1 +name: hwpfinc2 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %x1, %q2 = LD1i64_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 132 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc3 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1i8_POST %q2, 0, %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x1, 132 +name: hwpfinc3 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %x1, %q2 = LD1i8_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4) + %w0 = LDRWui %x1, 132 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc4 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1Rv1d_POST %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x1, 252 +name: hwpfinc4 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %x1, %d2 = LD1Rv1d_POST %x1, %xzr :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 252 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc5 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD3Threev2s_POST %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWroX %x17, %x0 +name: hwpfinc5 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %x17, %q2 + + %x1, %d2_d3_d4 = LD3Threev2s_POST %x1, %x0 :: ("aarch64-strided-access" load 4) + %w0 = LDRWroX %x17, %x0, 0, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc6 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LDPDpost %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x17, 2 +name: hwpfinc6 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %x17, %q2 + + %x1, %d2, %d3 = LDPDpost %x1, 3 :: ("aarch64-strided-access" load 4) + %w16 = LDRWui %x17, 2 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc7 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LDPXpost %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x17, 2 +name: hwpfinc7 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %x17, %q2 + + %x1, %x2, %x3 = LDPXpost %x1, 3 :: ("aarch64-strided-access" load 4) + %w18 = LDRWui %x17, 2 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# Check that we handle case of strided load with no HW prefetcher tag correctly. + +# CHECK-LABEL: name: hwpf_notagbug +# CHECK-NOT: ORRXrs %xzr +# CHECK: LDARW %x1 +# CHECK-NOT: ORRXrs %xzr +# CHECK: LDRWui %x1 +name: hwpf_notagbug +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %x17 + + %w1 = LDARW %x1 :: ("aarch64-strided-access" load 4) + %w1 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4) + %w17 = LDRWui %x17, 0 :: ("aarch64-strided-access" load 4) + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll index fcc852263b481..3ea6df5be49c5 100644 --- a/test/CodeGen/AArch64/fastcc.ll +++ b/test/CodeGen/AArch64/fastcc.ll @@ -21,9 +21,11 @@ define fastcc void @func_stack0() { call fastcc void @func_stack8([8 x i32] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, +; CHECK-NOT: [sp, #{{[-0-9]+}}]! +; CHECK-NOT: [sp], #{{[-0-9]+}} ; CHECK-TAIL: bl func_stack8 -; CHECK-TAIL: sub sp, sp, #16 +; CHECK-TAIL: stp xzr, xzr, [sp, #-16]! call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) @@ -72,10 +74,12 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) { call fastcc void @func_stack8([8 x i32] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, +; CHECK-NOT: [sp, #{{[-0-9]+}}]! +; CHECK-NOT: [sp], #{{[-0-9]+}} ; CHECK-TAIL: bl func_stack8 -; CHECK-TAIL: sub sp, sp, #16 +; CHECK-TAIL: stp xzr, xzr, [sp, #-16]! call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) @@ -116,9 +120,11 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) { call fastcc void @func_stack8([8 x i32] undef, i32 42) ; CHECK: bl func_stack8 ; CHECK-NOT: sub sp, sp, +; CHECK-NOT: [sp, #{{[-0-9]+}}]! +; CHECK-NOT: [sp], #{{[-0-9]+}} ; CHECK-TAIL: bl func_stack8 -; CHECK-TAIL: sub sp, sp, #16 +; CHECK-TAIL: stp xzr, xzr, [sp, #-16]! call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 975e5ae8b9531..2b98d3215e49a 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -1667,4 +1667,17 @@ entry: ret void } - +; Check for bug 34674 where invalid add of xzr was being generated. +; CHECK-LABEL: bug34674: +; CHECK: // %entry +; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr +; CHECK-DAG: stp [[ZREG]], [[ZREG]], [x0] +; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1 +define i64 @bug34674(<2 x i64>* %p) { +entry: + store <2 x i64> zeroinitializer, <2 x i64>* %p + %p2 = bitcast <2 x i64>* %p to i64* + %ld = load i64, i64* %p2 + %add = add i64 %ld, 1 + ret i64 %add +} diff --git a/test/CodeGen/AArch64/thread-pointer.ll b/test/CodeGen/AArch64/thread-pointer.ll deleted file mode 100644 index 7fea719436f9f..0000000000000 --- a/test/CodeGen/AArch64/thread-pointer.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s - -@x = thread_local local_unnamed_addr global i32 0, align 4 -@y = thread_local local_unnamed_addr global i32 0, align 4 - -; Machine LICM should hoist the mrs into the loop preheader. -; CHECK-LABEL: @test1 -; CHECK: BB#1: -; CHECK: mrs x[[BASE:[0-9]+]], TPIDR_EL0 -; CHECK: add x[[REG1:[0-9]+]], x[[BASE]], :tprel_hi12:x -; CHECK: add x[[REG2:[0-9]+]], x[[REG1]], :tprel_lo12_nc:x -; -; CHECK: .LBB0_2: -; CHECK: ldr w0, [x[[REG2]]] -; CHECK: bl bar -; CHECK: subs w[[REG3:[0-9]+]], w{{[0-9]+}}, #1 -; CHECK: b.ne .LBB0_2 - -define void @test1(i32 %n) local_unnamed_addr { -entry: - %cmp3 = icmp sgt i32 %n, 0 - br i1 %cmp3, label %bb1, label %bb2 - -bb1: - br label %for.body - -for.body: - %i.04 = phi i32 [ %inc, %for.body ], [ 0, %bb1 ] - %0 = load i32, i32* @x, align 4 - tail call void @bar(i32 %0) #2 - %inc = add nuw nsw i32 %i.04, 1 - %exitcond = icmp eq i32 %inc, %n - br i1 %exitcond, label %bb2, label %for.body - -bb2: - ret void -} - -; Machine CSE should combine the the mrs between the load of %x and %y. -; CHECK-LABEL: @test2 -; CHECK: mrs x{{[0-9]+}}, TPIDR_EL0 -; CHECK-NOT: mrs x{{[0-9]+}}, TPIDR_EL0 -; CHECK: ret -define void @test2(i32 %c) local_unnamed_addr #0 { -entry: - %0 = load i32, i32* @x, align 4 - tail call void @bar(i32 %0) #2 - %cmp = icmp eq i32 %c, 0 - br i1 %cmp, label %if.end, label %if.then - -if.then: - %1 = load i32, i32* @y, align 4 - tail call void @bar(i32 %1) #2 - br label %if.end - -if.end: - ret void -} - -declare void @bar(i32) local_unnamed_addr |