diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-05-21 06:57:07 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-05-21 06:57:07 +0000 |
commit | f03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (patch) | |
tree | 311f96478e9fceea407d1f187f9c5cef712f796e /test/CodeGen/AArch64 | |
parent | b6bcb9a905dec7821221e8ceaf1504c1f329815e (diff) |
Diffstat (limited to 'test/CodeGen/AArch64')
-rw-r--r-- | test/CodeGen/AArch64/arm64-tls-dynamics.ll | 128 | ||||
-rw-r--r-- | test/CodeGen/AArch64/arm64-tls-execs.ll | 25 | ||||
-rw-r--r-- | test/CodeGen/AArch64/implicit-sret.ll | 13 | ||||
-rw-r--r-- | test/CodeGen/AArch64/machine-copy-prop.ll | 101 | ||||
-rw-r--r-- | test/CodeGen/AArch64/tailcall-explicit-sret.ll | 106 | ||||
-rw-r--r-- | test/CodeGen/AArch64/tailcall-implicit-sret.ll | 46 |
6 files changed, 359 insertions, 60 deletions
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll index 30ea63b4664a..a89c2c5e6fd5 100644 --- a/test/CodeGen/AArch64/arm64-tls-dynamics.ll +++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll @@ -1,5 +1,7 @@ -; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOLD %s +; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-NOLD-RELOC %s @general_dynamic_var = external thread_local global i32 @@ -9,22 +11,34 @@ define i32 @test_generaldynamic() { %val = load i32* @general_dynamic_var ret i32 %val - ; FIXME: the adrp instructions are redundant (if harmless). -; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var +; CHECK-NEXT: .tlsdesccall general_dynamic_var ; CHECK-NEXT: blr [[CALLEE]] +; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var +; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] +; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var +; CHECK-NOLD-NEXT: .tlsdesccall general_dynamic_var +; CHECK-NOLD-NEXT: blr [[CALLEE]] + + ; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0 ; CHECK: ldr w0, [x[[TP]], x0] +; CHECK-NOLD: mrs x[[TP:[0-9]+]], TPIDR_EL0 +; CHECK-NOLD: ldr w0, [x[[TP]], x0] ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL + } define i32* @test_generaldynamic_addr() { @@ -32,21 +46,25 @@ define i32* @test_generaldynamic_addr() { ret i32* @general_dynamic_var - ; FIXME: the adrp instructions are redundant (if harmless). -; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var +; CHECK-NEXT: .tlsdesccall general_dynamic_var ; CHECK-NEXT: blr [[CALLEE]] ; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0 ; CHECK: add x0, [[TP]], x0 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL + +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL + } @local_dynamic_var = external thread_local(localdynamic) global i32 @@ -58,54 +76,71 @@ define i32 @test_localdynamic() { ret i32 %val ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ +; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ ; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: add x[[TPREL:[0-9]+]], x0, [[DTP_OFFSET]] - +; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var +; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var ; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK: ldr w0, [x[[TPIDR]], x[[TPOFF]]] + +; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var +; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var] +; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var +; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var +; CHECK-NOLD-NEXT: blr [[CALLEE]] +; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK-NOLD: ldr w0, [x[[TPIDR]], x0] -; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]] ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC + +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } define i32* @test_localdynamic_addr() { ; CHECK-LABEL: test_localdynamic_addr: - ret i32* @local_dynamic_var - ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ +; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ ; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: add [[TPREL:x[0-9]+]], x0, [[DTP_OFFSET]] - -; CHECK: mrs [[TPIDR:x[0-9]+]], TPIDR_EL0 - -; CHECK: add x0, [[TPIDR]], [[TPREL]] +; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var +; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var +; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK: add x0, x[[TPIDR]], x[[TPOFF]] + +; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var +; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var] +; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var +; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var +; CHECK-NOLD-NEXT: blr [[CALLEE]] +; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 +; CHECK-NOLD: add x0, x[[TPIDR]], x0 + ret i32* @local_dynamic_var ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } ; The entire point of the local-dynamic access model is to have a single call to @@ -122,11 +157,10 @@ define i32 @test_localdynamic_deduplicate() { %sum = add i32 %val, %val2 ret i32 %sum -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK: adrp x[[DTPREL_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ +; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[DTPREL_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE +; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ ; CHECK-NEXT: blr [[CALLEE]] ; CHECK-NOT: _TLS_MODULE_BASE_ diff --git a/test/CodeGen/AArch64/arm64-tls-execs.ll b/test/CodeGen/AArch64/arm64-tls-execs.ll index f0130d858896..e6d3d680f417 100644 --- a/test/CodeGen/AArch64/arm64-tls-execs.ll +++ b/test/CodeGen/AArch64/arm64-tls-execs.ll @@ -38,14 +38,13 @@ define i32 @test_local_exec() { ; CHECK-LABEL: test_local_exec: %val = load i32* @local_exec_var -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [0bAAA{{[01]+}},A,0b101AAAAA,0x92] -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0 -; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]] - -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC +; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var +; CHECK: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var +; CHECK: ldr w0, [x[[R3]]] +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC ret i32 %val } @@ -53,11 +52,11 @@ define i32* @test_local_exec_addr() { ; CHECK-LABEL: test_local_exec_addr: ret i32* @local_exec_var -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0 -; CHECK: add x0, [[TP]], [[TP_OFFSET]] +; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0 +; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var +; CHECK: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var +; CHECK: ret -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12 +; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC } diff --git a/test/CodeGen/AArch64/implicit-sret.ll b/test/CodeGen/AArch64/implicit-sret.ll new file mode 100644 index 000000000000..264d519f36f8 --- /dev/null +++ b/test/CodeGen/AArch64/implicit-sret.ll @@ -0,0 +1,13 @@ +; RUN: llc %s -o - -mtriple=arm64-apple-ios7.0 | FileCheck %s +; +; Handle implicit sret arguments that are generated on-the-fly during lowering. +; <rdar://19792160> Null pointer assertion in AArch64TargetLowering + +; CHECK-LABEL: big_retval +; ... str or stp for the first 1024 bits +; CHECK: strb wzr, [x8, #128] +; CHECK: ret +define i1032 @big_retval() { +entry: + ret i1032 0 +} diff --git a/test/CodeGen/AArch64/machine-copy-prop.ll b/test/CodeGen/AArch64/machine-copy-prop.ll new file mode 100644 index 000000000000..92d877d40f59 --- /dev/null +++ b/test/CodeGen/AArch64/machine-copy-prop.ll @@ -0,0 +1,101 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s + +; This file check a bug in MachineCopyPropagation pass. The last COPY will be +; incorrectly removed if the machine instructions are as follows: +; %Q5_Q6<def> = COPY %Q2_Q3 +; %D5<def> = +; %D3<def> = +; %D3<def> = COPY %D6 +; This is caused by a bug in function SourceNoLongerAvailable(), which fails to +; remove the relationship of D6 and "%Q5_Q6<def> = COPY %Q2_Q3". + +@failed = internal unnamed_addr global i1 false + +; CHECK-LABEL: foo: +; CHECK: ld2 +; CHECK-NOT: // kill: D{{[0-9]+}}<def> D{{[0-9]+}}<kill> +define void @foo(<2 x i32> %shuffle251, <8 x i8> %vtbl1.i, i8* %t2, <2 x i32> %vrsubhn_v2.i1364) { +entry: + %val0 = alloca [2 x i64], align 8 + %val1 = alloca <2 x i64>, align 16 + %vmull = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> <i32 -1, i32 -1>, <2 x i32> %shuffle251) + %vgetq_lane = extractelement <2 x i64> %vmull, i32 0 + %cmp = icmp eq i64 %vgetq_lane, 1 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + store i1 true, i1* @failed, align 1 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @f2() + %sqdmull = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> <i16 1, i16 0, i16 0, i16 0>, <4 x i16> <i16 2, i16 0, i16 0, i16 0>) + %sqadd = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> zeroinitializer, <4 x i32> %sqdmull) + %shuffle = shufflevector <4 x i32> %sqadd, <4 x i32> undef, <2 x i32> zeroinitializer + %0 = mul <2 x i32> %shuffle, <i32 -1, i32 0> + %sub = add <2 x i32> %0, <i32 1, i32 0> + %sext = sext <2 x i32> %sub to <2 x i64> + %vset_lane603 = shufflevector <2 x i64> %sext, <2 x i64> undef, <1 x i32> zeroinitializer + %t1 = bitcast [2 x i64]* %val0 to i8* + call void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i64 1, i8* %t1) + call void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64> <i64 4096>, <1 x i64> <i64 -1>, i64 0, i8* %t2) + %vld2_lane = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> <i64 11>, <1 x i64> <i64 11>, i64 0, i8* %t2) + %vld2_lane.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0 + %vld2_lane.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1 + %vld2_lane1 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> %vld2_lane.0.extract, <1 x i64> %vld2_lane.1.extract, i64 0, i8* %t1) + %vld2_lane1.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 0 + %vld2_lane1.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 1 + %t3 = bitcast <2 x i64>* %val1 to i8* + call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> %vld2_lane1.0.extract, <1 x i64> %vld2_lane1.1.extract, i8* %t3) + %t4 = load <2 x i64>* %val1, align 16 + %vsubhn = sub <2 x i64> <i64 11, i64 0>, %t4 + %vsubhn1 = lshr <2 x i64> %vsubhn, <i64 32, i64 32> + %vsubhn2 = trunc <2 x i64> %vsubhn1 to <2 x i32> + %neg = xor <2 x i32> %vsubhn2, <i32 -1, i32 -1> + %sqadd1 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> <i64 -1>, <1 x i64> <i64 1>) + %sqadd2 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %vset_lane603, <1 x i64> %sqadd1) + %sqadd3 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> <i64 1>, <1 x i64> %sqadd2) + %shuffle.i = shufflevector <2 x i32> <i32 undef, i32 0>, <2 x i32> %vrsubhn_v2.i1364, <2 x i32> <i32 1, i32 3> + %cmp.i = icmp uge <2 x i32> %shuffle.i, %neg + %sext.i = sext <2 x i1> %cmp.i to <2 x i32> + %vpadal = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %sext.i) + %t5 = sub <1 x i64> %vpadal, %sqadd3 + %vget_lane1 = extractelement <1 x i64> %t5, i32 0 + %cmp2 = icmp eq i64 %vget_lane1, 15 + br i1 %cmp2, label %if.end2, label %if.then2 + +if.then2: ; preds = %if.end + store i1 true, i1* @failed, align 1 + br label %if.end2 + +if.end2: ; preds = %if.then682, %if.end + call void @f2() + %vext = shufflevector <8 x i8> <i8 undef, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vtbl1.i, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> + %t6 = bitcast <8 x i8> %vext to <2 x i32> + call void @f0(<2 x i32> %t6) + ret void +} + +declare void @f0(<2 x i32>) + +declare <8 x i8> @f1() + +declare void @f2() + +declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) + +declare void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64>, <2 x i64>, i64, i8* nocapture) + +declare void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8* nocapture) + +declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8*) + +declare void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64>, <1 x i64>, i8* nocapture) + +declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) + +declare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>) + +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) + +declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll new file mode 100644 index 000000000000..f4ad65584095 --- /dev/null +++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll @@ -0,0 +1,106 @@ +; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s +; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks. + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Check that we don't try to tail-call with a non-forwarded sret parameter. +declare void @test_explicit_sret(i1024* sret) #0 + +; This is the only OK case, where we forward the explicit sret pointer. + +; CHECK-LABEL: _test_tailcall_explicit_sret: +; CHECK-NEXT: b _test_explicit_sret +define void @test_tailcall_explicit_sret(i1024* sret %arg) #0 { + tail call void @test_explicit_sret(i1024* %arg) + ret void +} + +; CHECK-LABEL: _test_call_explicit_sret: +; CHECK-NOT: mov x8 +; CHECK: bl _test_explicit_sret +; CHECK: ret +define void @test_call_explicit_sret(i1024* sret %arg) #0 { + call void @test_explicit_sret(i1024* %arg) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_unused: +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_explicit_sret +; CHECK: ret +define void @test_tailcall_explicit_sret_alloca_unused() #0 { + %l = alloca i1024, align 8 + tail call void @test_explicit_sret(i1024* %l) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers: +; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0] +; CHECK: str [[PTRLOAD1]], [sp] +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_explicit_sret +; CHECK: ret +define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 { + %l = alloca i1024, align 8 + %r = load i1024* %ptr, align 8 + store i1024 %r, i1024* %l, align 8 + tail call void @test_explicit_sret(i1024* %l) + ret void +} + +; This is too conservative, but doesn't really happen in practice. + +; CHECK-LABEL: _test_tailcall_explicit_sret_gep: +; CHECK: add x8, x0, #128 +; CHECK-NEXT: bl _test_explicit_sret +; CHECK: ret +define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 { + %ptr2 = getelementptr i1024* %ptr, i32 1 + tail call void @test_explicit_sret(i1024* %ptr2) + ret void +} + +; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_returned: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_explicit_sret +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 { + %l = alloca i1024, align 8 + tail call void @test_explicit_sret(i1024* %l) + %r = load i1024* %l, align 8 + ret i1024 %r +} + +; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_nosret_arg: +; CHECK-DAG: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK-DAG: mov [[FPTR:x[0-9]+]], x0 +; CHECK: mov x0, sp +; CHECK-NEXT: blr [[FPTR]] +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 { + %l = alloca i1024, align 8 + tail call void %f(i1024* %l) + %r = load i1024* %l, align 8 + store i1024 %r, i1024* %arg, align 8 + ret void +} + +; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: blr x0 +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 { + %ret = tail call i1024 %f() + store i1024 %ret, i1024* %arg, align 8 + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/tailcall-implicit-sret.ll b/test/CodeGen/AArch64/tailcall-implicit-sret.ll new file mode 100644 index 000000000000..5d6805998d22 --- /dev/null +++ b/test/CodeGen/AArch64/tailcall-implicit-sret.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s +; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks. + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Check that we don't try to tail-call with an sret-demoted return. + +declare i1024 @test_sret() #0 + +; CHECK-LABEL: _test_call_sret: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_sret +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_call_sret() #0 { + %a = call i1024 @test_sret() + ret i1024 %a +} + +; CHECK-LABEL: _test_tailcall_sret: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: bl _test_sret +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_tailcall_sret() #0 { + %a = tail call i1024 @test_sret() + ret i1024 %a +} + +; CHECK-LABEL: _test_indirect_tailcall_sret: +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: mov x8, sp +; CHECK-NEXT: blr x0 +; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp] +; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]] +; CHECK: ret +define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 { + %a = tail call i1024 %f() + ret i1024 %a +} + +attributes #0 = { nounwind } |