5 files changed, 136 insertions, 36 deletions
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
index 14beb1ae9c367..1032a6d620bae 100644
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
@@ -28,6 +28,28 @@ return:                                           ; preds = %if.then172, %cond.e
   ret void
 }
 
+; Avoid an assert/bad codegen in LD1LANEPOST lowering by not forming
+; LD1LANEPOST ISD nodes with a non-constant lane index.
+define <4 x i32> @f2(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2, i32 %idx) {
+  %L0 = load i32, i32* %p
+  %p1 = getelementptr i32, i32* %p, i64 1
+  %L1 = load i32, i32* %p1
+  %v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2
+  %vret = insertelement <4 x i32> %v, i32 %L0, i32 %idx
+  store i32 %L1, i32 *%p
+  ret <4 x i32> %vret
+}
+
+; Check that a cycle is avoided during isel between the LD1LANEPOST instruction and the load of %L1.
+define <4 x i32> @f3(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2) {
+  %L0 = load i32, i32* %p
+  %p1 = getelementptr i32, i32* %p, i64 1
+  %L1 = load i32, i32* %p1
+  %v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2
+  %vret = insertelement <4 x i32> %v, i32 %L0, i32 %L1
+  ret <4 x i32> %vret
+}
+
 ; Function Attrs: nounwind readnone
 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #1
 
diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
index 2fb9d3b2d0309..664078fb7e942 100644
--- a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -1,27 +1,31 @@
-; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefix=CYCLONE --check-prefix=ALL
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefix=KRYO --check-prefix=ALL
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefix=FALKOR --check-prefix=ALL
+; RUN: llc -mtriple=arm64-apple-ios   -mcpu=cyclone   < %s | FileCheck %s -check-prefixes=ALL,CYCLONE
+; RUN: llc -mtriple=arm64-apple-ios   -mcpu=cyclone -mattr=+fullfp16 < %s | FileCheck %s -check-prefixes=CYCLONE-FULLFP16
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m1 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m3 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo      < %s | FileCheck %s -check-prefixes=ALL,OTHERS
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor    < %s | FileCheck %s -check-prefixes=ALL,OTHERS
 
-; rdar://11481771
-; rdar://13713797
+declare void @bar(half, float, double, <2 x double>)
+declare void @bari(i32, i32)
+declare void @barl(i64, i64)
+declare void @barf(float, float)
 
 define void @t1() nounwind ssp {
 entry:
 ; ALL-LABEL: t1:
 ; ALL-NOT: fmov
-; CYCLONE: fmov d0, xzr
-; CYCLONE: fmov d1, xzr
+; ALL:     ldr h0,{{.*}}
+; CYCLONE: fmov s1, wzr
 ; CYCLONE: fmov d2, xzr
-; CYCLONE: fmov d3, xzr
-; KRYO: movi v0.2d, #0000000000000000
-; KRYO: movi v1.2d, #0000000000000000
-; KRYO: movi v2.2d, #0000000000000000
-; KRYO: movi v3.2d, #0000000000000000
-; FALKOR: movi v0.2d, #0000000000000000
-; FALKOR: movi v1.2d, #0000000000000000
-; FALKOR: movi v2.2d, #0000000000000000
-; FALKOR: movi v3.2d, #0000000000000000
-  tail call void @bar(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) nounwind
+; CYCLONE: movi.16b v3, #0
+; CYCLONE-FULLFP16: fmov h0, wzr
+; CYCLONE-FULLFP16: fmov s1, wzr
+; CYCLONE-FULLFP16: fmov d2, xzr
+; CYCLONE-FULLFP16: movi.16b v3, #0
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
+  tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind
   ret void
 }
 
@@ -29,8 +33,8 @@ define void @t2() nounwind ssp {
 entry:
 ; ALL-LABEL: t2:
 ; ALL-NOT: mov w0, wzr
-; ALL: mov w0, #0
-; ALL: mov w1, #0
+; ALL: mov w{{[0-3]+}}, #0
+; ALL: mov w{{[0-3]+}}, #0
   tail call void @bari(i32 0, i32 0) nounwind
   ret void
 }
@@ -39,8 +43,8 @@ define void @t3() nounwind ssp {
 entry:
 ; ALL-LABEL: t3:
 ; ALL-NOT: mov x0, xzr
-; ALL: mov x0, #0
-; ALL: mov x1, #0
+; ALL: mov x{{[0-3]+}}, #0
+; ALL: mov x{{[0-3]+}}, #0
   tail call void @barl(i64 0, i64 0) nounwind
   ret void
 }
@@ -48,26 +52,21 @@ entry:
 define void @t4() nounwind ssp {
 ; ALL-LABEL: t4:
 ; ALL-NOT: fmov
-; CYCLONE: fmov s0, wzr
-; CYCLONE: fmov s1, wzr
-; KRYO: movi v0.2d, #0000000000000000
-; KRYO: movi v1.2d, #0000000000000000
-; FALKOR: movi v0.2d, #0000000000000000
-; FALKOR: movi v1.2d, #0000000000000000
+; CYCLONE: fmov s{{[0-3]+}}, wzr
+; CYCLONE: fmov s{{[0-3]+}}, wzr
+; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
+; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
   tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind
   ret void
 }
 
-declare void @bar(double, double, double, double)
-declare void @bari(i32, i32)
-declare void @barl(i64, i64)
-declare void @barf(float, float)
-
 ; We used to produce spills+reloads for a Q register with zero cycle zeroing
 ; enabled.
 ; ALL-LABEL: foo:
-; ALL-NOT: str {{q[0-9]+}}
-; ALL-NOT: ldr {{q[0-9]+}}
+; ALL-NOT: str q{{[0-9]+}}
+; ALL-NOT: ldr q{{[0-9]+}}
 define double @foo(i32 %n) {
 entry:
   br label %for.body
@@ -90,8 +89,7 @@ for.end:
 define <2 x i64> @t6() {
 ; ALL-LABEL: t6:
 ; CYCLONE: movi.16b v0, #0
-; KRYO: movi v0.2d, #0000000000000000
-; FALKOR: movi v0.2d, #0000000000000000
+; OTHERS: movi v0.2d, #0000000000000000
  ret <2 x i64> zeroinitializer
 }
 
diff --git a/test/CodeGen/AArch64/falkor-hwpf-fix.mir b/test/CodeGen/AArch64/falkor-hwpf-fix.mir
index 38622ae0e49ab..28b19f8776858 100644
--- a/test/CodeGen/AArch64/falkor-hwpf-fix.mir
+++ b/test/CodeGen/AArch64/falkor-hwpf-fix.mir
@@ -353,3 +353,28 @@ body: |
   bb.1:
     RET_ReallyLR
 ...
+---
+# Check that non-base registers are considered live when finding a
+# scratch register by making sure we don't use %x2 for the scratch
+# register for the inserted ORRXrs.
+# CHECK-LABEL: name: hwpf_offreg
+# CHECK: %x3 = ORRXrs %xzr, %x1, 0
+# CHECK: %w10 = LDRWroX %x3, %x2, 0, 0
+name:            hwpf_offreg
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1, %x2, %x17, %x18
+
+    %w10 = LDRWroX %x1, %x2, 0, 0 :: ("aarch64-strided-access" load 4)
+
+    %x2 = ORRXrs %xzr, %x10, 0
+    %w26 = LDRWroX %x1, %x2, 0, 0
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
diff --git a/test/CodeGen/AArch64/inlineasm-S-constraint.ll b/test/CodeGen/AArch64/inlineasm-S-constraint.ll
new file mode 100644
index 0000000000000..3fb2a3f32cea3
--- /dev/null
+++ b/test/CodeGen/AArch64/inlineasm-S-constraint.ll
@@ -0,0 +1,20 @@
+;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+@var = global i32 0
+define void @test_inline_constraint_S() {
+; CHECK-LABEL: test_inline_constraint_S:
+  call void asm sideeffect "adrp x0, $0", "S"(i32* @var)
+  call void asm sideeffect "add x0, x0, :lo12:$0", "S"(i32* @var)
+; CHECK: adrp x0, var
+; CHECK: add x0, x0, :lo12:var
+  ret void
+}
+define i32 @test_inline_constraint_S_label(i1 %in) {
+; CHECK-LABEL: test_inline_constraint_S_label:
+  call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc))
+; CHECK: adr x0, .Ltmp{{[0-9]+}}
+br i1 %in, label %loc, label %loc2
+loc:
+  ret i32 0
+loc2:
+  ret i32 42
+}
diff --git a/test/CodeGen/AArch64/spill-stack-realignment.mir b/test/CodeGen/AArch64/spill-stack-realignment.mir
new file mode 100644
index 0000000000000..fe85f4b64027b
--- /dev/null
+++ b/test/CodeGen/AArch64/spill-stack-realignment.mir
@@ -0,0 +1,35 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
+
+# Ensure references to scavenged stack slots in the CSR area use the
+# FP as a base when the stack pointer must be aligned to something
+# larger than required by the target. This is necessary because the
+# alignment padding area is between the CSR area and the SP, so the SP
+# cannot be used to reference the CSR area.
+name:            test
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    64
+# CHECK:      stack:
+# CHECK:        id: 0, name: '', type: default, offset: -64, size: 4, alignment: 64
+# CHECK-NEXT:     stack-id: 0
+# CHECK-NEXT:     local-offset: -64
+# CHECK:        id: 1, name: '', type: default, offset: -20, size: 4, alignment: 4
+# CHECK-NEXT:     stack-id: 0
+# CHECK-NEXT:     local-offset: -68
+stack:
+  - { id: 0, size: 4, alignment: 64, local-offset: -64 }
+  - { id: 1, size: 4, alignment: 4, local-offset: -68 }
+
+# CHECK: body:
+# CHECK:   %sp = ANDXri killed %{{x[0-9]+}}, 7865
+# CHECK:   STRSui %s0, %sp, 0
+# CHECK:   STURSi %s0, %fp, -4
+body:             |
+  bb.0.entry:
+    liveins: %s0
+
+    STRSui %s0, %stack.0, 0
+    STRSui %s0, %stack.1, 0
+    ; Force preserve a CSR to create a hole in the CSR stack region.
+    %x28 = IMPLICIT_DEF
+    RET_ReallyLR