summaryrefslogtreecommitdiff
path: root/test/CodeGen/AArch64
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-05-21 06:57:07 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-05-21 06:57:07 +0000
commitf03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (patch)
tree311f96478e9fceea407d1f187f9c5cef712f796e /test/CodeGen/AArch64
parentb6bcb9a905dec7821221e8ceaf1504c1f329815e (diff)
Diffstat (limited to 'test/CodeGen/AArch64')
-rw-r--r--test/CodeGen/AArch64/arm64-tls-dynamics.ll128
-rw-r--r--test/CodeGen/AArch64/arm64-tls-execs.ll25
-rw-r--r--test/CodeGen/AArch64/implicit-sret.ll13
-rw-r--r--test/CodeGen/AArch64/machine-copy-prop.ll101
-rw-r--r--test/CodeGen/AArch64/tailcall-explicit-sret.ll106
-rw-r--r--test/CodeGen/AArch64/tailcall-implicit-sret.ll46
6 files changed, 359 insertions, 60 deletions
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
index 30ea63b4664a..a89c2c5e6fd5 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamics.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOLD %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-NOLD-RELOC %s
@general_dynamic_var = external thread_local global i32
@@ -9,22 +11,34 @@ define i32 @test_generaldynamic() {
%val = load i32* @general_dynamic_var
ret i32 %val
- ; FIXME: the adrp instructions are redundant (if harmless).
-; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
-; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
+; CHECK-NEXT: .tlsdesccall general_dynamic_var
; CHECK-NEXT: blr [[CALLEE]]
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
+; CHECK-NOLD-NEXT: .tlsdesccall general_dynamic_var
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
+
+
; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
; CHECK: ldr w0, [x[[TP]], x0]
+; CHECK-NOLD: mrs x[[TP:[0-9]+]], TPIDR_EL0
+; CHECK-NOLD: ldr w0, [x[[TP]], x0]
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
+
}
define i32* @test_generaldynamic_addr() {
@@ -32,21 +46,25 @@ define i32* @test_generaldynamic_addr() {
ret i32* @general_dynamic_var
- ; FIXME: the adrp instructions are redundant (if harmless).
-; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
-; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
+; CHECK-NEXT: .tlsdesccall general_dynamic_var
; CHECK-NEXT: blr [[CALLEE]]
; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
; CHECK: add x0, [[TP]], x0
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
+
}
@local_dynamic_var = external thread_local(localdynamic) global i32
@@ -58,54 +76,71 @@ define i32 @test_localdynamic() {
ret i32 %val
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
; CHECK-NEXT: blr [[CALLEE]]
-
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
-
-; CHECK: add x[[TPREL:[0-9]+]], x0, [[DTP_OFFSET]]
-
+; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var
+; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var
; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK: ldr w0, [x[[TPIDR]], x[[TPOFF]]]
+
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var]
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var
+; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
+; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK-NOLD: ldr w0, [x[[TPIDR]], x0]
-; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]]
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
}
define i32* @test_localdynamic_addr() {
; CHECK-LABEL: test_localdynamic_addr:
- ret i32* @local_dynamic_var
-
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
; CHECK-NEXT: blr [[CALLEE]]
-
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
-
-; CHECK: add [[TPREL:x[0-9]+]], x0, [[DTP_OFFSET]]
-
-; CHECK: mrs [[TPIDR:x[0-9]+]], TPIDR_EL0
-
-; CHECK: add x0, [[TPIDR]], [[TPREL]]
+; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var
+; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var
+; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK: add x0, x[[TPIDR]], x[[TPOFF]]
+
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var]
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var
+; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
+; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK-NOLD: add x0, x[[TPIDR]], x0
+ ret i32* @local_dynamic_var
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
}
; The entire point of the local-dynamic access model is to have a single call to
@@ -122,11 +157,10 @@ define i32 @test_localdynamic_deduplicate() {
%sum = add i32 %val, %val2
ret i32 %sum
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK: adrp x[[DTPREL_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[DTPREL_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
; CHECK-NEXT: blr [[CALLEE]]
; CHECK-NOT: _TLS_MODULE_BASE_
diff --git a/test/CodeGen/AArch64/arm64-tls-execs.ll b/test/CodeGen/AArch64/arm64-tls-execs.ll
index f0130d858896..e6d3d680f417 100644
--- a/test/CodeGen/AArch64/arm64-tls-execs.ll
+++ b/test/CodeGen/AArch64/arm64-tls-execs.ll
@@ -38,14 +38,13 @@ define i32 @test_local_exec() {
; CHECK-LABEL: test_local_exec:
%val = load i32* @local_exec_var
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [0bAAA{{[01]+}},A,0b101AAAAA,0x92]
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
-; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
-; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
-
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
+; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
+; CHECK: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var
+; CHECK: ldr w0, [x[[R3]]]
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
ret i32 %val
}
@@ -53,11 +52,11 @@ define i32* @test_local_exec_addr() {
; CHECK-LABEL: test_local_exec_addr:
ret i32* @local_exec_var
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
-; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
-; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
+; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
+; CHECK: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var
+; CHECK: ret
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
}
diff --git a/test/CodeGen/AArch64/implicit-sret.ll b/test/CodeGen/AArch64/implicit-sret.ll
new file mode 100644
index 000000000000..264d519f36f8
--- /dev/null
+++ b/test/CodeGen/AArch64/implicit-sret.ll
@@ -0,0 +1,13 @@
+; RUN: llc %s -o - -mtriple=arm64-apple-ios7.0 | FileCheck %s
+;
+; Handle implicit sret arguments that are generated on-the-fly during lowering.
+; <rdar://19792160> Null pointer assertion in AArch64TargetLowering
+
+; CHECK-LABEL: big_retval
+; ... str or stp for the first 1024 bits
+; CHECK: strb wzr, [x8, #128]
+; CHECK: ret
+define i1032 @big_retval() {
+entry:
+ ret i1032 0
+}
diff --git a/test/CodeGen/AArch64/machine-copy-prop.ll b/test/CodeGen/AArch64/machine-copy-prop.ll
new file mode 100644
index 000000000000..92d877d40f59
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-copy-prop.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s
+
+; This file check a bug in MachineCopyPropagation pass. The last COPY will be
+; incorrectly removed if the machine instructions are as follows:
+; %Q5_Q6<def> = COPY %Q2_Q3
+; %D5<def> =
+; %D3<def> =
+; %D3<def> = COPY %D6
+; This is caused by a bug in function SourceNoLongerAvailable(), which fails to
+; remove the relationship of D6 and "%Q5_Q6<def> = COPY %Q2_Q3".
+
+@failed = internal unnamed_addr global i1 false
+
+; CHECK-LABEL: foo:
+; CHECK: ld2
+; CHECK-NOT: // kill: D{{[0-9]+}}<def> D{{[0-9]+}}<kill>
+define void @foo(<2 x i32> %shuffle251, <8 x i8> %vtbl1.i, i8* %t2, <2 x i32> %vrsubhn_v2.i1364) {
+entry:
+ %val0 = alloca [2 x i64], align 8
+ %val1 = alloca <2 x i64>, align 16
+ %vmull = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> <i32 -1, i32 -1>, <2 x i32> %shuffle251)
+ %vgetq_lane = extractelement <2 x i64> %vmull, i32 0
+ %cmp = icmp eq i64 %vgetq_lane, 1
+ br i1 %cmp, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i1 true, i1* @failed, align 1
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @f2()
+ %sqdmull = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> <i16 1, i16 0, i16 0, i16 0>, <4 x i16> <i16 2, i16 0, i16 0, i16 0>)
+ %sqadd = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> zeroinitializer, <4 x i32> %sqdmull)
+ %shuffle = shufflevector <4 x i32> %sqadd, <4 x i32> undef, <2 x i32> zeroinitializer
+ %0 = mul <2 x i32> %shuffle, <i32 -1, i32 0>
+ %sub = add <2 x i32> %0, <i32 1, i32 0>
+ %sext = sext <2 x i32> %sub to <2 x i64>
+ %vset_lane603 = shufflevector <2 x i64> %sext, <2 x i64> undef, <1 x i32> zeroinitializer
+ %t1 = bitcast [2 x i64]* %val0 to i8*
+ call void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i64 1, i8* %t1)
+ call void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64> <i64 4096>, <1 x i64> <i64 -1>, i64 0, i8* %t2)
+ %vld2_lane = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> <i64 11>, <1 x i64> <i64 11>, i64 0, i8* %t2)
+ %vld2_lane.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0
+ %vld2_lane.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1
+ %vld2_lane1 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> %vld2_lane.0.extract, <1 x i64> %vld2_lane.1.extract, i64 0, i8* %t1)
+ %vld2_lane1.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 0
+ %vld2_lane1.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 1
+ %t3 = bitcast <2 x i64>* %val1 to i8*
+ call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> %vld2_lane1.0.extract, <1 x i64> %vld2_lane1.1.extract, i8* %t3)
+ %t4 = load <2 x i64>* %val1, align 16
+ %vsubhn = sub <2 x i64> <i64 11, i64 0>, %t4
+ %vsubhn1 = lshr <2 x i64> %vsubhn, <i64 32, i64 32>
+ %vsubhn2 = trunc <2 x i64> %vsubhn1 to <2 x i32>
+ %neg = xor <2 x i32> %vsubhn2, <i32 -1, i32 -1>
+ %sqadd1 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> <i64 -1>, <1 x i64> <i64 1>)
+ %sqadd2 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %vset_lane603, <1 x i64> %sqadd1)
+ %sqadd3 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> <i64 1>, <1 x i64> %sqadd2)
+ %shuffle.i = shufflevector <2 x i32> <i32 undef, i32 0>, <2 x i32> %vrsubhn_v2.i1364, <2 x i32> <i32 1, i32 3>
+ %cmp.i = icmp uge <2 x i32> %shuffle.i, %neg
+ %sext.i = sext <2 x i1> %cmp.i to <2 x i32>
+ %vpadal = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %sext.i)
+ %t5 = sub <1 x i64> %vpadal, %sqadd3
+ %vget_lane1 = extractelement <1 x i64> %t5, i32 0
+ %cmp2 = icmp eq i64 %vget_lane1, 15
+ br i1 %cmp2, label %if.end2, label %if.then2
+
+if.then2: ; preds = %if.end
+ store i1 true, i1* @failed, align 1
+ br label %if.end2
+
+if.end2: ; preds = %if.then682, %if.end
+ call void @f2()
+ %vext = shufflevector <8 x i8> <i8 undef, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vtbl1.i, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %t6 = bitcast <8 x i8> %vext to <2 x i32>
+ call void @f0(<2 x i32> %t6)
+ ret void
+}
+
+declare void @f0(<2 x i32>)
+
+declare <8 x i8> @f1()
+
+declare void @f2()
+
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64>, <2 x i64>, i64, i8* nocapture)
+
+declare void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8* nocapture)
+
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8*)
+
+declare void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64>, <1 x i64>, i8* nocapture)
+
+declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>)
+
+declare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
new file mode 100644
index 000000000000..f4ad65584095
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s
+; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Check that we don't try to tail-call with a non-forwarded sret parameter.
+declare void @test_explicit_sret(i1024* sret) #0
+
+; This is the only OK case, where we forward the explicit sret pointer.
+
+; CHECK-LABEL: _test_tailcall_explicit_sret:
+; CHECK-NEXT: b _test_explicit_sret
+define void @test_tailcall_explicit_sret(i1024* sret %arg) #0 {
+ tail call void @test_explicit_sret(i1024* %arg)
+ ret void
+}
+
+; CHECK-LABEL: _test_call_explicit_sret:
+; CHECK-NOT: mov x8
+; CHECK: bl _test_explicit_sret
+; CHECK: ret
+define void @test_call_explicit_sret(i1024* sret %arg) #0 {
+ call void @test_explicit_sret(i1024* %arg)
+ ret void
+}
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_unused:
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK: ret
+define void @test_tailcall_explicit_sret_alloca_unused() #0 {
+ %l = alloca i1024, align 8
+ tail call void @test_explicit_sret(i1024* %l)
+ ret void
+}
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers:
+; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0]
+; CHECK: str [[PTRLOAD1]], [sp]
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK: ret
+define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 {
+ %l = alloca i1024, align 8
+ %r = load i1024* %ptr, align 8
+ store i1024 %r, i1024* %l, align 8
+ tail call void @test_explicit_sret(i1024* %l)
+ ret void
+}
+
+; This is too conservative, but doesn't really happen in practice.
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_gep:
+; CHECK: add x8, x0, #128
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK: ret
+define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
+ %ptr2 = getelementptr i1024* %ptr, i32 1
+ tail call void @test_explicit_sret(i1024* %ptr2)
+ ret void
+}
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_returned:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
+ %l = alloca i1024, align 8
+ tail call void @test_explicit_sret(i1024* %l)
+ %r = load i1024* %l, align 8
+ ret i1024 %r
+}
+
+; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_nosret_arg:
+; CHECK-DAG: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK-DAG: mov [[FPTR:x[0-9]+]], x0
+; CHECK: mov x0, sp
+; CHECK-NEXT: blr [[FPTR]]
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 {
+ %l = alloca i1024, align 8
+ tail call void %f(i1024* %l)
+ %r = load i1024* %l, align 8
+ store i1024 %r, i1024* %arg, align 8
+ ret void
+}
+
+; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: blr x0
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 {
+ %ret = tail call i1024 %f()
+ store i1024 %ret, i1024* %arg, align 8
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/tailcall-implicit-sret.ll b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
new file mode 100644
index 000000000000..5d6805998d22
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s
+; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Check that we don't try to tail-call with an sret-demoted return.
+
+declare i1024 @test_sret() #0
+
+; CHECK-LABEL: _test_call_sret:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_sret
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_call_sret() #0 {
+ %a = call i1024 @test_sret()
+ ret i1024 %a
+}
+
+; CHECK-LABEL: _test_tailcall_sret:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: bl _test_sret
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_tailcall_sret() #0 {
+ %a = tail call i1024 @test_sret()
+ ret i1024 %a
+}
+
+; CHECK-LABEL: _test_indirect_tailcall_sret:
+; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov x8, sp
+; CHECK-NEXT: blr x0
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 {
+ %a = tail call i1024 %f()
+ ret i1024 %a
+}
+
+attributes #0 = { nounwind }