vendor/llvm/llvm-trunk-r290819

author: Dimitry Andric <dim@FreeBSD.org> 2017-01-02 19:17:04 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-01-02 19:17:04 +0000
commit: b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch)
tree: 98b8f811c7aff2547cab8642daf372d6c59502fb /test/CodeGen/AArch64
parent: 6421cca32f69ac849537a3cff78c352195e99f1b (diff)
336 files changed, 11732 insertions, 1338 deletions
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
new file mode 100644
index 000000000000..95b2ea2b4ffc
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linux-gnu"
+
+; CHECK-LABEL: name: args_i32
+; CHECK: %[[ARG0:[0-9]+]](s32) = COPY %w0
+; CHECK: %{{[0-9]+}}(s32) = COPY %w1
+; CHECK: %{{[0-9]+}}(s32) = COPY %w2
+; CHECK: %{{[0-9]+}}(s32) = COPY %w3
+; CHECK: %{{[0-9]+}}(s32) = COPY %w4
+; CHECK: %{{[0-9]+}}(s32) = COPY %w5
+; CHECK: %{{[0-9]+}}(s32) = COPY %w6
+; CHECK: %{{[0-9]+}}(s32) = COPY %w7
+; CHECK: %w0 = COPY %[[ARG0]]
+
+define i32 @args_i32(i32 %w0, i32 %w1, i32 %w2, i32 %w3,
+                     i32 %w4, i32 %w5, i32 %w6, i32 %w7) {
+  ret i32 %w0
+}
+
+; CHECK-LABEL: name: args_i64
+; CHECK: %[[ARG0:[0-9]+]](s64) = COPY %x0
+; CHECK: %{{[0-9]+}}(s64) = COPY %x1
+; CHECK: %{{[0-9]+}}(s64) = COPY %x2
+; CHECK: %{{[0-9]+}}(s64) = COPY %x3
+; CHECK: %{{[0-9]+}}(s64) = COPY %x4
+; CHECK: %{{[0-9]+}}(s64) = COPY %x5
+; CHECK: %{{[0-9]+}}(s64) = COPY %x6
+; CHECK: %{{[0-9]+}}(s64) = COPY %x7
+; CHECK: %x0 = COPY %[[ARG0]]
+define i64 @args_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3,
+                     i64 %x4, i64 %x5, i64 %x6, i64 %x7) {
+  ret i64 %x0
+}
+
+
+; CHECK-LABEL: name: args_ptrs
+; CHECK: %[[ARG0:[0-9]+]](p0) = COPY %x0
+; CHECK: %{{[0-9]+}}(p0) = COPY %x1
+; CHECK: %{{[0-9]+}}(p0) = COPY %x2
+; CHECK: %{{[0-9]+}}(p0) = COPY %x3
+; CHECK: %{{[0-9]+}}(p0) = COPY %x4
+; CHECK: %{{[0-9]+}}(p0) = COPY %x5
+; CHECK: %{{[0-9]+}}(p0) = COPY %x6
+; CHECK: %{{[0-9]+}}(p0) = COPY %x7
+; CHECK: %x0 = COPY %[[ARG0]]
+define i8* @args_ptrs(i8* %x0, i16* %x1, <2 x i8>* %x2, {i8, i16, i32}* %x3,
+                      [3 x float]* %x4, double* %x5, i8* %x6, i8* %x7) {
+  ret i8* %x0
+}
+
+; CHECK-LABEL: name: args_arr
+; CHECK: %[[ARG0:[0-9]+]](s64) = COPY %d0
+; CHECK: %d0 = COPY %[[ARG0]]
+define [1 x double] @args_arr([1 x double] %d0) {
+  ret [1 x double] %d0
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
new file mode 100644
index 000000000000..8d1dbc246e6a
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -0,0 +1,117 @@
+; RUN: not llc -O0 -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR
+; RUN: llc -O0 -global-isel -global-isel-abort=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=FALLBACK
+; RUN: llc -O0 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o %t.out 2> %t.err
+; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-OUT < %t.out
+; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-ERR < %t.err
+; This file checks that the fallback path to selection dag works.
+; The test is fragile in the sense that it must be updated to expose
+; something that fails with global-isel.
+; When we cannot produce a test case anymore, that means we can remove
+; the fallback path.
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--"
+
+; We use __fixunstfti as the common denominator for __fixunstfti on Linux and
+; ___fixunstfti on iOS
+; ERROR: Unable to lower arguments
+; FALLBACK: ldr q0,
+; FALLBACK-NEXT: bl __fixunstfti
+;
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ABIi128
+; FALLBACK-WITH-REPORT-OUT-LABEL: ABIi128:
+; FALLBACK-WITH-REPORT-OUT: ldr q0,
+; FALLBACK-WITH-REPORT-OUT-NEXT: bl __fixunstfti
+define i128 @ABIi128(i128 %arg1) {
+  %farg1 =       bitcast i128 %arg1 to fp128
+  %res = fptoui fp128 %farg1 to i128
+  ret i128 %res
+}
+
+; It happens that we don't handle ConstantArray instances yet during
+; translation. Any other constant would be fine too.
+
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for constant
+; FALLBACK-WITH-REPORT-OUT-LABEL: constant:
+; FALLBACK-WITH-REPORT-OUT: fmov d0, #1.0
+define [1 x double] @constant() {
+  ret [1 x double] [double 1.0]
+}
+
+  ; The key problem here is that we may fail to create an MBB referenced by a
+  ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things
+  ; happen.
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for pending_phis
+; FALLBACK-WITH-REPORT-OUT-LABEL: pending_phis:
+define i32 @pending_phis(i1 %tst, i32 %val, i32* %addr) {
+  br i1 %tst, label %true, label %false
+
+end:
+  %res = phi i32 [%val, %true], [42, %false]
+  ret i32 %res
+
+true:
+  store atomic i32 42, i32* %addr seq_cst, align 4
+  br label %end
+
+false:
+  br label %end
+
+}
+
+  ; General legalizer inability to handle types whose size wasn't a power of 2.
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type
+; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type:
+define void @odd_type(i42* %addr) {
+  %val42 = load i42, i42* %addr
+  ret void
+}
+
+  ; RegBankSelect crashed when given invalid mappings, and AArch64's
+  ; implementation produce valid-but-nonsense mappings for G_SEQUENCE.
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for sequence_mapping
+; FALLBACK-WITH-REPORT-OUT-LABEL: sequence_mapping:
+define void @sequence_mapping([2 x i64] %in) {
+  ret void
+}
+
+  ; Legalizer was asserting when it enountered an unexpected default action.
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for legal_default
+; FALLBACK-WITH-REPORT-LABEL: legal_default:
+define void @legal_default(i64 %in) {
+  insertvalue [2 x i64] undef, i64 %in, 0
+  ret void
+}
+
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for debug_insts
+; FALLBACK-WITH-REPORT-LABEL: debug_insts:
+define void @debug_insts(i32 %in) #0 !dbg !7 {
+entry:
+  %in.addr = alloca i32, align 4
+  store i32 %in, i32* %in.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %in.addr, metadata !11, metadata !12), !dbg !13
+  ret void, !dbg !14
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0 (trunk 289075) (llvm/trunk 289080)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "tmp.c", directory: "/Users/tim/llvm/build")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"PIC Level", i32 2}
+!6 = !{!"clang version 4.0.0 (trunk 289075) (llvm/trunk 289080)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "in", arg: 1, scope: !7, file: !1, line: 1, type: !10)
+!12 = !DIExpression()
+!13 = !DILocation(line: 1, column: 14, scope: !7)
+!14 = !DILocation(line: 2, column: 1, scope: !7)
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-instructionselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-instructionselect.mir
new file mode 100644
index 000000000000..22210e49bd77
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-instructionselect.mir
@@ -0,0 +1,2979 @@
+# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=IOS
+# RUN: llc -O0 -mtriple=aarch64-linux-gnu -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-DEFAULT
+# RUN: llc -O0 -mtriple=aarch64-linux-gnu -relocation-model=pic -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-PIC
+
+# Test the instruction selector.
+# As we support more instructions, we need to split this up.
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+  define void @add_s8_gpr() { ret void }
+  define void @add_s16_gpr() { ret void }
+  define void @add_s32_gpr() { ret void }
+  define void @add_s64_gpr() { ret void }
+
+  define void @sub_s8_gpr() { ret void }
+  define void @sub_s16_gpr() { ret void }
+  define void @sub_s32_gpr() { ret void }
+  define void @sub_s64_gpr() { ret void }
+
+  define void @or_s1_gpr() { ret void }
+  define void @or_s16_gpr() { ret void }
+  define void @or_s32_gpr() { ret void }
+  define void @or_s64_gpr() { ret void }
+  define void @or_v2s32_fpr() { ret void }
+
+  define void @xor_s8_gpr() { ret void }
+  define void @xor_s16_gpr() { ret void }
+  define void @xor_s32_gpr() { ret void }
+  define void @xor_s64_gpr() { ret void }
+
+  define void @and_s8_gpr() { ret void }
+  define void @and_s16_gpr() { ret void }
+  define void @and_s32_gpr() { ret void }
+  define void @and_s64_gpr() { ret void }
+
+  define void @shl_s8_gpr() { ret void }
+  define void @shl_s16_gpr() { ret void }
+  define void @shl_s32_gpr() { ret void }
+  define void @shl_s64_gpr() { ret void }
+
+  define void @lshr_s32_gpr() { ret void }
+  define void @lshr_s64_gpr() { ret void }
+
+  define void @ashr_s32_gpr() { ret void }
+  define void @ashr_s64_gpr() { ret void }
+
+  define void @mul_s8_gpr() { ret void }
+  define void @mul_s16_gpr() { ret void }
+  define void @mul_s32_gpr() { ret void }
+  define void @mul_s64_gpr() { ret void }
+
+  define void @sdiv_s32_gpr() { ret void }
+  define void @sdiv_s64_gpr() { ret void }
+
+  define void @udiv_s32_gpr() { ret void }
+  define void @udiv_s64_gpr() { ret void }
+
+  define void @fadd_s32_gpr() { ret void }
+  define void @fadd_s64_gpr() { ret void }
+
+  define void @fsub_s32_gpr() { ret void }
+  define void @fsub_s64_gpr() { ret void }
+
+  define void @fmul_s32_gpr() { ret void }
+  define void @fmul_s64_gpr() { ret void }
+
+  define void @fdiv_s32_gpr() { ret void }
+  define void @fdiv_s64_gpr() { ret void }
+
+  define void @sitofp_s32_s32_fpr() { ret void }
+  define void @sitofp_s32_s64_fpr() { ret void }
+  define void @sitofp_s64_s32_fpr() { ret void }
+  define void @sitofp_s64_s64_fpr() { ret void }
+
+  define void @uitofp_s32_s32_fpr() { ret void }
+  define void @uitofp_s32_s64_fpr() { ret void }
+  define void @uitofp_s64_s32_fpr() { ret void }
+  define void @uitofp_s64_s64_fpr() { ret void }
+
+  define void @fptosi_s32_s32_gpr() { ret void }
+  define void @fptosi_s32_s64_gpr() { ret void }
+  define void @fptosi_s64_s32_gpr() { ret void }
+  define void @fptosi_s64_s64_gpr() { ret void }
+
+  define void @fptoui_s32_s32_gpr() { ret void }
+  define void @fptoui_s32_s64_gpr() { ret void }
+  define void @fptoui_s64_s32_gpr() { ret void }
+  define void @fptoui_s64_s64_gpr() { ret void }
+
+  define void @fptrunc() { ret void }
+  define void @fpext() { ret void }
+
+  define void @unconditional_br() { ret void }
+  define void @conditional_br() { ret void }
+
+  define void @load_s64_gpr(i64* %addr) { ret void }
+  define void @load_s32_gpr(i32* %addr) { ret void }
+  define void @load_s16_gpr(i16* %addr) { ret void }
+  define void @load_s8_gpr(i8* %addr) { ret void }
+  define void @load_s64_fpr(i64* %addr) { ret void }
+  define void @load_s32_fpr(i32* %addr) { ret void }
+  define void @load_s16_fpr(i16* %addr) { ret void }
+  define void @load_s8_fpr(i8* %addr) { ret void }
+
+  define void @store_s64_gpr(i64* %addr) { ret void }
+  define void @store_s32_gpr(i32* %addr) { ret void }
+  define void @store_s16_gpr(i16* %addr) { ret void }
+  define void @store_s8_gpr(i8* %addr) { ret void }
+  define void @store_s64_fpr(i64* %addr) { ret void }
+  define void @store_s32_fpr(i32* %addr) { ret void }
+
+  define void @frame_index() {
+    %ptr0 = alloca i64
+    ret void
+  }
+
+  define void @selected_property() { ret void }
+
+  define i32 @const_s32() { ret i32 42 }
+  define i64 @const_s64() { ret i64 1234567890123 }
+
+  define i32 @fconst_s32() { ret i32 42 }
+  define i64 @fconst_s64() { ret i64 1234567890123 }
+
+  define i8* @gep(i8* %in) { ret i8* undef }
+
+  @var_local = global i8 0
+  define i8* @global_local() { ret i8* undef }
+
+  @var_got = external global i8
+  define i8* @global_got() { ret i8* undef }
+
+  define void @trunc() { ret void }
+
+  define void @anyext_gpr() { ret void }
+  define void @zext_gpr() { ret void }
+  define void @sext_gpr() { ret void }
+
+  define void @casts() { ret void }
+
+  define void @bitcast_s32_gpr() { ret void }
+  define void @bitcast_s32_fpr() { ret void }
+  define void @bitcast_s32_gpr_fpr() { ret void }
+  define void @bitcast_s32_fpr_gpr() { ret void }
+  define void @bitcast_s64_gpr() { ret void }
+  define void @bitcast_s64_fpr() { ret void }
+  define void @bitcast_s64_gpr_fpr() { ret void }
+  define void @bitcast_s64_fpr_gpr() { ret void }
+
+  define void @icmp() { ret void }
+  define void @fcmp() { ret void }
+
+  define void @phi() { ret void }
+
+  define void @select() { ret void }
+...
+
+---
+# CHECK-LABEL: name: add_s8_gpr
+name:            add_s8_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ADDWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s8) = COPY %w0
+    %1(s8) = COPY %w1
+    %2(s8) = G_ADD %0, %1
+...
+
+---
+# CHECK-LABEL: name: add_s16_gpr
+name:            add_s16_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ADDWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s16) = COPY %w0
+    %1(s16) = COPY %w1
+    %2(s16) = G_ADD %0, %1
+...
+
+---
+# Check that we select a 32-bit GPR G_ADD into ADDWrr on GPR32.
+# Also check that we constrain the register class of the COPY to GPR32.
+# CHECK-LABEL: name: add_s32_gpr
+name:            add_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ADDWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_ADD %0, %1
+...
+
+---
+# Same as add_s32_gpr, for 64-bit operations.
+# CHECK-LABEL: name: add_s64_gpr
+name:            add_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = ADDXrr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_ADD %0, %1
+...
+
+---
+# CHECK-LABEL: name: sub_s8_gpr
+name:            sub_s8_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = SUBWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s8) = COPY %w0
+    %1(s8) = COPY %w1
+    %2(s8) = G_SUB %0, %1
+...
+
+---
+# CHECK-LABEL: name: sub_s16_gpr
+name:            sub_s16_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = SUBWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s16) = COPY %w0
+    %1(s16) = COPY %w1
+    %2(s16) = G_SUB %0, %1
+...
+
+---
+# Same as add_s32_gpr, for G_SUB operations.
+# CHECK-LABEL: name: sub_s32_gpr
+name:            sub_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = SUBWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_SUB %0, %1
+...
+
+---
+# Same as add_s64_gpr, for G_SUB operations.
+# CHECK-LABEL: name: sub_s64_gpr
+name:            sub_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = SUBXrr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_SUB %0, %1
+...
+
+---
+# CHECK-LABEL: name: or_s1_gpr
+name:            or_s1_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ORRWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s1) = COPY %w0
+    %1(s1) = COPY %w1
+    %2(s1) = G_OR %0, %1
+...
+
+---
+# CHECK-LABEL: name: or_s16_gpr
+name:            or_s16_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ORRWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s16) = COPY %w0
+    %1(s16) = COPY %w1
+    %2(s16) = G_OR %0, %1
+...
+
+---
+# Same as add_s32_gpr, for G_OR operations.
+# CHECK-LABEL: name: or_s32_gpr
+name:            or_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ORRWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_OR %0, %1
+...
+
+---
+# Same as add_s64_gpr, for G_OR operations.
+# CHECK-LABEL: name: or_s64_gpr
+name:            or_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = ORRXrr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_OR %0, %1
+...
+
+---
+# 64-bit G_OR on vector registers.
+# CHECK-LABEL: name: or_v2s32_fpr
+name:            or_v2s32_fpr
+legalized:       true
+regBankSelected: true
+#
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 2, class: fpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = COPY %d1
+# The actual OR does not matter as long as it is operating
+# on 64-bit width vector.
+# CHECK:    %2 = ORRv8i8 %0, %1
+body:             |
+  bb.0:
+    liveins: %d0, %d1
+
+      %0(<2 x s32>) = COPY %d0
+      %1(<2 x s32>) = COPY %d1
+      %2(<2 x s32>) = G_OR %0, %1
+...
+
+---
+# CHECK-LABEL: name: xor_s8_gpr
+name:            xor_s8_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = EORWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s8) = COPY %w0
+    %1(s8) = COPY %w1
+    %2(s8) = G_XOR %0, %1
+...
+
+---
+# CHECK-LABEL: name: xor_s16_gpr
+name:            xor_s16_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = EORWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s16) = COPY %w0
+    %1(s16) = COPY %w1
+    %2(s16) = G_XOR %0, %1
+...
+
+---
+# Same as add_s32_gpr, for G_XOR operations.
+# CHECK-LABEL: name: xor_s32_gpr
+name:            xor_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = EORWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_XOR %0, %1
+...
+
+---
+# Same as add_s64_gpr, for G_XOR operations.
+# CHECK-LABEL: name: xor_s64_gpr
+name:            xor_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = EORXrr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_XOR %0, %1
+...
+
+---
+# CHECK-LABEL: name: and_s8_gpr
+name:            and_s8_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ANDWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s8) = COPY %w0
+    %1(s8) = COPY %w1
+    %2(s8) = G_AND %0, %1
+...
+
+---
+# CHECK-LABEL: name: and_s16_gpr
+name:            and_s16_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ANDWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s16) = COPY %w0
+    %1(s16) = COPY %w1
+    %2(s16) = G_AND %0, %1
+...
+
+---
+# Same as add_s32_gpr, for G_AND operations.
+# CHECK-LABEL: name: and_s32_gpr
+name:            and_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ANDWrr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_AND %0, %1
+...
+
+---
+# Same as add_s64_gpr, for G_AND operations.
+# CHECK-LABEL: name: and_s64_gpr
+name:            and_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = ANDXrr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_AND %0, %1
+...
+
+---
+# CHECK-LABEL: name: shl_s8_gpr
+name:            shl_s8_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = LSLVWr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s8) = COPY %w0
+    %1(s8) = COPY %w1
+    %2(s8) = G_SHL %0, %1
+...
+
+---
+# CHECK-LABEL: name: shl_s16_gpr
+name:            shl_s16_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = LSLVWr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s16) = COPY %w0
+    %1(s16) = COPY %w1
+    %2(s16) = G_SHL %0, %1
+...
+
+---
+# Same as add_s32_gpr, for G_SHL operations.
+# CHECK-LABEL: name: shl_s32_gpr
+name:            shl_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = LSLVWr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_SHL %0, %1
+...
+
+---
+# Same as add_s64_gpr, for G_SHL operations.
+# CHECK-LABEL: name: shl_s64_gpr
+name:            shl_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = LSLVXr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_SHL %0, %1
+...
+
+---
+# Same as add_s32_gpr, for G_LSHR operations.
+# CHECK-LABEL: name: lshr_s32_gpr
+name:            lshr_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = LSRVWr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_LSHR %0, %1
+...
+
+---
+# Same as add_s64_gpr, for G_LSHR operations.
+# CHECK-LABEL: name: lshr_s64_gpr
+name:            lshr_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = LSRVXr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_LSHR %0, %1
+...
+
+---
+# Same as add_s32_gpr, for G_ASHR operations.
+# CHECK-LABEL: name: ashr_s32_gpr
+name:            ashr_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = ASRVWr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_ASHR %0, %1
+...
+
+---
+# Same as add_s64_gpr, for G_ASHR operations.
+# CHECK-LABEL: name: ashr_s64_gpr
+name:            ashr_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = ASRVXr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_ASHR %0, %1
+...
+
+---
+# CHECK-LABEL: name: mul_s8_gpr
+name:            mul_s8_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = MADDWrrr %0, %1, %wzr
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s8) = COPY %w0
+    %1(s8) = COPY %w1
+    %2(s8) = G_MUL %0, %1
+...
+
+---
+# CHECK-LABEL: name: mul_s16_gpr
+name:            mul_s16_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = MADDWrrr %0, %1, %wzr
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s16) = COPY %w0
+    %1(s16) = COPY %w1
+    %2(s16) = G_MUL %0, %1
+...
+
+---
+# Check that we select s32 GPR G_MUL. This is trickier than other binops because
+# there is only MADDWrrr, and we have to use the WZR physreg.
+# CHECK-LABEL: name: mul_s32_gpr
+name:            mul_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = MADDWrrr %0, %1, %wzr
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_MUL %0, %1
+...
+
+---
+# Same as mul_s32_gpr for the s64 type.
+# CHECK-LABEL: name: mul_s64_gpr
+name:            mul_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = MADDXrrr %0, %1, %xzr
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_MUL %0, %1
+...
+
+---
+# Same as add_s32_gpr, for G_SDIV operations.
+# CHECK-LABEL: name: sdiv_s32_gpr
+name:            sdiv_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = SDIVWr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_SDIV %0, %1
+...
+
+---
+# Same as add_s64_gpr, for G_SDIV operations.
+# CHECK-LABEL: name: sdiv_s64_gpr
+name:            sdiv_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = SDIVXr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_SDIV %0, %1
+...
+
+---
+# Same as add_s32_gpr, for G_UDIV operations.
+# CHECK-LABEL: name: udiv_s32_gpr
+name:            udiv_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %w1
+# CHECK:    %2 = UDIVWr %0, %1
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s32) = G_UDIV %0, %1
+...
+
+---
+# Same as add_s64_gpr, for G_UDIV operations.
+# CHECK-LABEL: name: udiv_s64_gpr
+name:            udiv_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %x1
+# CHECK:    %2 = UDIVXr %0, %1
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_UDIV %0, %1
+...
+
+---
+# Check that we select a s32 FPR G_FADD into FADDSrr.
+# CHECK-LABEL: name: fadd_s32_gpr
+name:            fadd_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 2, class: fpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = COPY %s1
+# CHECK:    %2 = FADDSrr %0, %1
+body:             |
+  bb.0:
+    liveins: %s0, %s1
+
+    %0(s32) = COPY %s0
+    %1(s32) = COPY %s1
+    %2(s32) = G_FADD %0, %1
+...
+
+---
+# CHECK-LABEL: name: fadd_s64_gpr
+name:            fadd_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 2, class: fpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = COPY %d1
+# CHECK:    %2 = FADDDrr %0, %1
+body:             |
+  bb.0:
+    liveins: %d0, %d1
+
+    %0(s64) = COPY %d0
+    %1(s64) = COPY %d1
+    %2(s64) = G_FADD %0, %1
+...
+
+---
+# CHECK-LABEL: name: fsub_s32_gpr
+name:            fsub_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 2, class: fpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = COPY %s1
+# CHECK:    %2 = FSUBSrr %0, %1
+body:             |
+  bb.0:
+    liveins: %s0, %s1
+
+    %0(s32) = COPY %s0
+    %1(s32) = COPY %s1
+    %2(s32) = G_FSUB %0, %1
+...
+
+---
+# CHECK-LABEL: name: fsub_s64_gpr
+name:            fsub_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 2, class: fpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = COPY %d1
+# CHECK:    %2 = FSUBDrr %0, %1
+body:             |
+  bb.0:
+    liveins: %d0, %d1
+
+    %0(s64) = COPY %d0
+    %1(s64) = COPY %d1
+    %2(s64) = G_FSUB %0, %1
+...
+
+---
+# CHECK-LABEL: name: fmul_s32_gpr
+name:            fmul_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 2, class: fpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = COPY %s1
+# CHECK:    %2 = FMULSrr %0, %1
+body:             |
+  bb.0:
+    liveins: %s0, %s1
+
+    %0(s32) = COPY %s0
+    %1(s32) = COPY %s1
+    %2(s32) = G_FMUL %0, %1
+...
+
+---
+# CHECK-LABEL: name: fmul_s64_gpr
+name:            fmul_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 2, class: fpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = COPY %d1
+# CHECK:    %2 = FMULDrr %0, %1
+body:             |
+  bb.0:
+    liveins: %d0, %d1
+
+    %0(s64) = COPY %d0
+    %1(s64) = COPY %d1
+    %2(s64) = G_FMUL %0, %1
+...
+
+---
+# CHECK-LABEL: name: fdiv_s32_gpr
+name:            fdiv_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 2, class: fpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = COPY %s1
+# CHECK:    %2 = FDIVSrr %0, %1
+body:             |
+  bb.0:
+    liveins: %s0, %s1
+
+    %0(s32) = COPY %s0
+    %1(s32) = COPY %s1
+    %2(s32) = G_FDIV %0, %1
+...
+
+---
+# CHECK-LABEL: name: fdiv_s64_gpr
+name:            fdiv_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 2, class: fpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = COPY %d1
+# CHECK:    %2 = FDIVDrr %0, %1
+body:             |
+  bb.0:
+    liveins: %d0, %d1
+
+    %0(s64) = COPY %d0
+    %1(s64) = COPY %d1
+    %2(s64) = G_FDIV %0, %1
+...
+
+---
+# CHECK-LABEL: name: sitofp_s32_s32_fpr
+name:            sitofp_s32_s32_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = SCVTFUWSri %0
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s32) = G_SITOFP %0
+...
+
+---
+# CHECK-LABEL: name: sitofp_s32_s64_fpr
+name:            sitofp_s32_s64_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = SCVTFUXSri %0
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(s64) = COPY %x0
+    %1(s32) = G_SITOFP %0
+...
+
+---
+# CHECK-LABEL: name: sitofp_s64_s32_fpr
+name:            sitofp_s64_s32_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = SCVTFUWDri %0
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s64) = G_SITOFP %0
+...
+
+---
+# CHECK-LABEL: name: sitofp_s64_s64_fpr
+name:            sitofp_s64_s64_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = SCVTFUXDri %0
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(s64) = COPY %x0
+    %1(s64) = G_SITOFP %0
+...
+
+---
+# CHECK-LABEL: name: uitofp_s32_s32_fpr
+name:            uitofp_s32_s32_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = UCVTFUWSri %0
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s32) = G_UITOFP %0
+...
+
+---
+# CHECK-LABEL: name: uitofp_s32_s64_fpr
+name:            uitofp_s32_s64_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = UCVTFUXSri %0
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(s64) = COPY %x0
+    %1(s32) = G_UITOFP %0
+...
+
+---
+# CHECK-LABEL: name: uitofp_s64_s32_fpr
+name:            uitofp_s64_s32_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = UCVTFUWDri %0
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s64) = G_UITOFP %0
+...
+
+---
+# CHECK-LABEL: name: uitofp_s64_s64_fpr
+name:            uitofp_s64_s64_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = UCVTFUXDri %0
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(s64) = COPY %x0
+    %1(s64) = G_UITOFP %0
+...
+
+---
+# CHECK-LABEL: name: fptosi_s32_s32_gpr
+name:            fptosi_s32_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = FCVTZSUWSr %0
+body:             |
+  bb.0:
+    liveins: %s0
+
+    %0(s32) = COPY %s0
+    %1(s32) = G_FPTOSI %0
+...
+
+---
+# CHECK-LABEL: name: fptosi_s32_s64_gpr
+name:            fptosi_s32_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = FCVTZSUWDr %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(s64) = COPY %d0
+    %1(s32) = G_FPTOSI %0
+...
+
+---
+# CHECK-LABEL: name: fptosi_s64_s32_gpr
+name:            fptosi_s64_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = FCVTZSUXSr %0
+body:             |
+  bb.0:
+    liveins: %s0
+
+    %0(s32) = COPY %s0
+    %1(s64) = G_FPTOSI %0
+...
+
+---
+# CHECK-LABEL: name: fptosi_s64_s64_gpr
+name:            fptosi_s64_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = FCVTZSUXDr %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(s64) = COPY %d0
+    %1(s64) = G_FPTOSI %0
+...
+
+---
+# CHECK-LABEL: name: fptoui_s32_s32_gpr
+name:            fptoui_s32_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = FCVTZUUWSr %0
+body:             |
+  bb.0:
+    liveins: %s0
+
+    %0(s32) = COPY %s0
+    %1(s32) = G_FPTOUI %0
+...
+
+---
+# CHECK-LABEL: name: fptoui_s32_s64_gpr
+name:            fptoui_s32_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = FCVTZUUWDr %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(s64) = COPY %d0
+    %1(s32) = G_FPTOUI %0
+...
+
+---
+# CHECK-LABEL: name: fptoui_s64_s32_gpr
+name:            fptoui_s64_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = FCVTZUUXSr %0
+body:             |
+  bb.0:
+    liveins: %s0
+
+    %0(s32) = COPY %s0
+    %1(s64) = G_FPTOUI %0
+...
+
+---
+# CHECK-LABEL: name: fptoui_s64_s64_gpr
+name:            fptoui_s64_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = FCVTZUUXDr %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(s64) = COPY %d0
+    %1(s64) = G_FPTOUI %0
+...
+
+---
+# CHECK-LABEL: name: fptrunc
+name:            fptrunc
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK: - { id: 0, class: fpr64 }
+# CHECK: - { id: 1, class: fpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = FCVTSDr %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(s64) = COPY %d0
+    %1(s32) = G_FPTRUNC %0
+...
+
+---
+# CHECK-LABEL: name: fpext
+name:            fpext
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK: - { id: 0, class: fpr32 }
+# CHECK: - { id: 1, class: fpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = FCVTDSr %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(s32) = COPY %s0
+    %1(s64) = G_FPEXT %0
+...
+
+---
+# CHECK-LABEL: name: unconditional_br
+name:            unconditional_br
+legalized:       true
+regBankSelected: true
+
+# CHECK:  body:
+# CHECK:   bb.0:
+# CHECK:    successors: %bb.0
+# CHECK:    B %bb.0
+body:             |
+  bb.0:
+    successors: %bb.0
+
+    G_BR %bb.0
+...
+
+---
+# CHECK-LABEL: name: conditional_br
+name:            conditional_br
+legalized:       true
+regBankSelected: true
+
+registers:
+  - { id: 0, class: gpr }
+
+# CHECK:  body:
+# CHECK:   bb.0:
+# CHECK:    TBNZW %0, 0, %bb.1
+# CHECK:    B %bb.0
+body:             |
+  bb.0:
+    successors: %bb.0, %bb.1
+    %0(s1) = COPY %w0
+    G_BRCOND %0(s1), %bb.1
+    G_BR %bb.0
+
+  bb.1:
+...
+
+---
+# CHECK-LABEL: name: load_s64_gpr
+name:            load_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = LDRXui %0, 0 :: (load 8 from %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(p0) = COPY %x0
+    %1(s64) = G_LOAD  %0 :: (load 8 from %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: load_s32_gpr
+name:            load_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = LDRWui %0, 0 :: (load 4 from %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(p0) = COPY %x0
+    %1(s32) = G_LOAD  %0 :: (load 4 from %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: load_s16_gpr
+name:            load_s16_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = LDRHHui %0, 0 :: (load 2 from %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(p0) = COPY %x0
+    %1(s16) = G_LOAD  %0 :: (load 2 from %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: load_s8_gpr
+name:            load_s8_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = LDRBBui %0, 0 :: (load 1 from %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(p0) = COPY %x0
+    %1(s8) = G_LOAD  %0 :: (load 1 from %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: load_s64_fpr
+name:            load_s64_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = LDRDui %0, 0 :: (load 8 from %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(p0) = COPY %x0
+    %1(s64) = G_LOAD  %0 :: (load 8 from %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: load_s32_fpr
+name:            load_s32_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = LDRSui %0, 0 :: (load 4 from %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(p0) = COPY %x0
+    %1(s32) = G_LOAD  %0 :: (load 4 from %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: load_s16_fpr
+name:            load_s16_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: fpr16 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = LDRHui %0, 0 :: (load 2 from %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(p0) = COPY %x0
+    %1(s16) = G_LOAD  %0 :: (load 2 from %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: load_s8_fpr
+name:            load_s8_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: fpr8 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = LDRBui %0, 0 :: (load 1 from %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(p0) = COPY %x0
+    %1(s8) = G_LOAD  %0 :: (load 1 from %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: store_s64_gpr
+name:            store_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = COPY %x1
+# CHECK: STRXui %1, %0, 0 :: (store 8 into %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(p0) = COPY %x0
+    %1(s64) = COPY %x1
+    G_STORE  %1, %0 :: (store 8 into %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: store_s32_gpr
+name:            store_s32_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = COPY %w1
+# CHECK: STRWui %1, %0, 0 :: (store 4 into %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0, %w1
+
+    %0(p0) = COPY %x0
+    %1(s32) = COPY %w1
+    G_STORE  %1, %0 :: (store 4 into %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: store_s16_gpr
+name:            store_s16_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = COPY %w1
+# CHECK: STRHHui %1, %0, 0 :: (store 2 into %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0, %w1
+
+    %0(p0) = COPY %x0
+    %1(s16) = COPY %w1
+    G_STORE  %1, %0 :: (store 2 into %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: store_s8_gpr
+name:            store_s8_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = COPY %w1
+# CHECK: STRBBui %1, %0, 0 :: (store 1 into %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0, %w1
+
+    %0(p0) = COPY %x0
+    %1(s8) = COPY %w1
+    G_STORE  %1, %0 :: (store 1 into %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: store_s64_fpr
+name:            store_s64_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = COPY %d1
+# CHECK: STRDui %1, %0, 0 :: (store 8 into %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0, %d1
+
+    %0(p0) = COPY %x0
+    %1(s64) = COPY %d1
+    G_STORE  %1, %0 :: (store 8 into %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: store_s32_fpr
+name:            store_s32_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK: %0 = COPY %x0
+# CHECK: %1 = COPY %s1
+# CHECK: STRSui %1, %0, 0 :: (store 4 into %ir.addr)
+body:             |
+  bb.0:
+    liveins: %x0, %s1
+
+    %0(p0) = COPY %x0
+    %1(s32) = COPY %s1
+    G_STORE  %1, %0 :: (store 4 into %ir.addr)
+
+...
+
+---
+# CHECK-LABEL: name: frame_index
+name:            frame_index
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+registers:
+  - { id: 0, class: gpr }
+
+stack:
+  - { id: 0, name: ptr0, offset: 0, size: 8, alignment: 8 }
+
+# CHECK:  body:
+# CHECK: %0 = ADDXri %stack.0.ptr0, 0, 0
+body:             |
+  bb.0:
+    %0(p0) = G_FRAME_INDEX %stack.0.ptr0
+...
+
+---
+# Check that we set the "selected" property.
+# CHECK-LABEL: name: selected_property
+# CHECK: legalized: true
+# CHECK-NEXT: regBankSelected: true
+# CHECK-NEXT: selected: true
+name:            selected_property
+legalized:       true
+regBankSelected: true
+selected:        false
+body:             |
+  bb.0:
+...
+
+---
+# CHECK-LABEL: name: const_s32
+name:            const_s32
+legalized:       true
+regBankSelected: true
+registers:
+  - { id: 0, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = MOVi32imm 42
+body:             |
+  bb.0:
+    %0(s32) = G_CONSTANT i32 42
+...
+
+---
+# CHECK-LABEL: name: const_s64
+name:            const_s64
+legalized:       true
+regBankSelected: true
+registers:
+  - { id: 0, class: gpr }
+
+# CHECK:  body:
+# CHECK: %0 = MOVi64imm 1234567890123
+body:             |
+  bb.0:
+    %0(s64) = G_CONSTANT i64 1234567890123
+...
+
+---
+# CHECK-LABEL: name: fconst_s32
+name:            fconst_s32
+legalized:       true
+regBankSelected: true
+registers:
+  - { id: 0, class: fpr }
+
+# CHECK:  body:
+# CHECK: [[TMP:%[0-9]+]] = MOVi32imm 1080033280
+# CHECK: %0 = COPY [[TMP]]
+body:             |
+  bb.0:
+    %0(s32) = G_FCONSTANT float 3.5
+...
+
+---
+# CHECK-LABEL: name: fconst_s64
+name:            fconst_s64
+legalized:       true
+regBankSelected: true
+registers:
+  - { id: 0, class: fpr }
+
+# CHECK:  body:
+# CHECK: [[TMP:%[0-9]+]] = MOVi64imm 4607182418800017408
+# CHECK: %0 = COPY [[TMP]]
+body:             |
+  bb.0:
+    %0(s64) = G_FCONSTANT double 1.0
+...
+
+---
+# CHECK-LABEL: name: gep
+name:            gep
+legalized:       true
+regBankSelected: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+
+# CHECK:  body:
+# CHECK: %1 = MOVi64imm 42
+# CHECK: %2 = ADDXrr %0, %1
+body:             |
+  bb.0:
+      liveins: %x0
+    %0(p0) = COPY %x0
+    %1(s64) = G_CONSTANT i64 42
+    %2(p0) = G_GEP %0, %1(s64)
+...
+
+---
+# Global defined in the same linkage unit so no GOT is needed
+# CHECK-LABEL: name: global_local
+name:            global_local
+legalized:       true
+regBankSelected: true
+registers:
+  - { id: 0, class: gpr }
+
+# CHECK:  body:
+# IOS: %0 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local
+# LINUX-DEFAULT: %0 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local
+# LINUX-PIC: %0 = LOADgot target-flags(aarch64-got) @var_local
+body:             |
+  bb.0:
+    %0(p0) = G_GLOBAL_VALUE @var_local
+...
+
+---
+# CHECK-LABEL: name: global_got
+name:            global_got
+legalized:       true
+regBankSelected: true
+registers:
+  - { id: 0, class: gpr }
+
+# CHECK:  body:
+# IOS: %0 = LOADgot target-flags(aarch64-got) @var_got
+# LINUX-DEFAULT: %0 = MOVaddr target-flags(aarch64-page) @var_got, target-flags(aarch64-pageoff, aarch64-nc) @var_got
+# LINUX-PIC: %0 = LOADgot target-flags(aarch64-got) @var_got
+body:             |
+  bb.0:
+    %0(p0) = G_GLOBAL_VALUE @var_got
+...
+
+---
+# CHECK-LABEL: name: trunc
+name:            trunc
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 3, class: gpr32 }
+# CHECK-NEXT:  - { id: 4, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %1 = COPY %0
+# CHECK:    %3 = COPY %2.sub_32
+# CHECK:    %4 = COPY %2.sub_32
+body:             |
+  bb.0:
+    liveins: %w0, %x0
+
+    %0(s32) = COPY %w0
+    %1(s1) = G_TRUNC %0
+
+    %2(s64) = COPY %x0
+    %3(s32) = G_TRUNC %2
+    %4(s8) = G_TRUNC %2
+...
+
+---
+# CHECK-LABEL: name: anyext_gpr
+name:            anyext_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32all }
+# CHECK-NEXT:  - { id: 1, class: gpr64all }
+# CHECK-NEXT:  - { id: 2, class: gpr32all }
+# CHECK-NEXT:  - { id: 3, class: gpr32all }
+# CHECK-NEXT:  - { id: 4, class: gpr64all }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %4 = SUBREG_TO_REG 0, %0, 15
+# CHECK:    %1 = COPY %4
+# CHECK:    %2 = COPY %w0
+# CHECK:    %3 = COPY %2
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s64) = G_ANYEXT %0
+    %2(s8) = COPY %w0
+    %3(s32) = G_ANYEXT %2
+...
+
+---
+# CHECK-LABEL: name: zext_gpr
+name:            zext_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 3, class: gpr32 }
+# CHECK-NEXT:  - { id: 4, class: gpr32 }
+# CHECK-NEXT:  - { id: 5, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %5 = SUBREG_TO_REG 0, %0, 15
+# CHECK:    %1 = UBFMXri %5, 0, 31
+# CHECK:    %2 = COPY %w0
+# CHECK:    %3 = UBFMWri %2, 0, 7
+# CHECK:    %4 = UBFMWri %2, 0, 7
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s64) = G_ZEXT %0
+    %2(s8) = COPY %w0
+    %3(s32) = G_ZEXT %2
+    %4(s16)= G_ZEXT %2
+...
+
+---
+# CHECK-LABEL: name: sext_gpr
+name:            sext_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 3, class: gpr32 }
+# CHECK-NEXT:  - { id: 4, class: gpr32 }
+# CHECK-NEXT:  - { id: 5, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %5 = SUBREG_TO_REG 0, %0, 15
+# CHECK:    %1 = SBFMXri %5, 0, 31
+# CHECK:    %2 = COPY %w0
+# CHECK:    %3 = SBFMWri %2, 0, 7
+# CHECK:    %4 = SBFMWri %2, 0, 7
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s64) = G_SEXT %0
+    %2(s8) = COPY %w0
+    %3(s32) = G_SEXT %2
+    %4(s16) = G_SEXT %2
+...
+
+---
+# CHECK-LABEL: name: casts
+name:            casts
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64all }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 3, class: gpr64 }
+# CHECK-NEXT:  - { id: 4, class: gpr32 }
+# CHECK-NEXT:  - { id: 5, class: gpr32 }
+# CHECK-NEXT:  - { id: 6, class: gpr32 }
+# CHECK-NEXT:  - { id: 7, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+  - { id: 5, class: gpr }
+  - { id: 6, class: gpr }
+  - { id: 7, class: gpr }
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %0
+# CHECK:    %2 = COPY %0
+# CHECK:    %3 = COPY %2
+# CHECK:    %4 = COPY %2.sub_32
+# CHECK:    %5 = COPY %2.sub_32
+# CHECK:    %6 = COPY %2.sub_32
+# CHECK:    %7 = COPY %2.sub_32
+body:             |
+  bb.0:
+    liveins: %x0
+    %0(s64) = COPY %x0
+    %1(<8 x s8>) = G_BITCAST %0(s64)
+    %2(p0) = G_INTTOPTR %0
+
+    %3(s64) = G_PTRTOINT %2
+    %4(s32) = G_PTRTOINT %2
+    %5(s16) = G_PTRTOINT %2
+    %6(s8) = G_PTRTOINT %2
+    %7(s1) = G_PTRTOINT %2
+...
+
+---
+# CHECK-LABEL: name: bitcast_s32_gpr
+name:            bitcast_s32_gpr
+legalized:       true
+regBankSelected: true
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32all }
+# CHECK-NEXT:  - { id: 1, class: gpr32all }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %0
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s32) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s32_fpr
+name:            bitcast_s32_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = COPY %0
+body:             |
+  bb.0:
+    liveins: %s0
+
+    %0(s32) = COPY %s0
+    %1(s32) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s32_gpr_fpr
+name:            bitcast_s32_gpr_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32all }
+# CHECK-NEXT:  - { id: 1, class: fpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %w0
+# CHECK:    %1 = COPY %0
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s32) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s32_fpr_gpr
+name:            bitcast_s32_fpr_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32all }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %s0
+# CHECK:    %1 = COPY %0
+body:             |
+  bb.0:
+    liveins: %s0
+
+    %0(s32) = COPY %s0
+    %1(s32) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s64_gpr
+name:            bitcast_s64_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64all }
+# CHECK-NEXT:  - { id: 1, class: gpr64all }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %0
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(s64) = COPY %x0
+    %1(s64) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s64_fpr
+name:            bitcast_s64_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: fpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = COPY %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(s64) = COPY %d0
+    %1(s64) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s64_gpr_fpr
+name:            bitcast_s64_gpr_fpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64all }
+# CHECK-NEXT:  - { id: 1, class: fpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+# CHECK:  body:
+# CHECK:    %0 = COPY %x0
+# CHECK:    %1 = COPY %0
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(s64) = COPY %x0
+    %1(s64) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s64_fpr_gpr
+name:            bitcast_s64_fpr_gpr
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64all }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %0 = COPY %d0
+# CHECK:    %1 = COPY %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(s64) = COPY %d0
+    %1(s64) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: icmp
+name:            icmp
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 3, class: gpr32 }
+# CHECK-NEXT:  - { id: 4, class: gpr64 }
+# CHECK-NEXT:  - { id: 5, class: gpr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+  - { id: 5, class: gpr }
+
+# CHECK:  body:
+# CHECK:    %wzr = SUBSWrr %0, %0, implicit-def %nzcv
+# CHECK:    %1 = CSINCWr %wzr, %wzr, 0, implicit %nzcv
+
+# CHECK:    %xzr = SUBSXrr %2, %2, implicit-def %nzcv
+# CHECK:    %3 = CSINCWr %wzr, %wzr, 2, implicit %nzcv
+
+# CHECK:    %xzr = SUBSXrr %4, %4, implicit-def %nzcv
+# CHECK:    %5 = CSINCWr %wzr, %wzr, 1, implicit %nzcv
+
+body:             |
+  bb.0:
+    liveins: %w0, %x0
+
+    %0(s32) = COPY %w0
+    %1(s1) = G_ICMP intpred(eq), %0, %0
+
+    %2(s64) = COPY %x0
+    %3(s1) = G_ICMP intpred(uge), %2, %2
+
+    %4(p0) = COPY %x0
+    %5(s1) = G_ICMP intpred(ne), %4, %4
+...
+
+---
+# CHECK-LABEL: name: fcmp
+name:            fcmp
+legalized:       true
+regBankSelected: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: fpr64 }
+# CHECK-NEXT:  - { id: 3, class: gpr32 }
+# CHECK-NEXT:  - { id: 4, class: gpr32 }
+# CHECK-NEXT:  - { id: 5, class: gpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: fpr }
+  - { id: 3, class: gpr }
+
+# CHECK:  body:
+# CHECK:    FCMPSrr %0, %0, implicit-def %nzcv
+# CHECK:    [[TST_MI:%[0-9]+]] = CSINCWr %wzr, %wzr, 4, implicit %nzcv
+# CHECK:    [[TST_GT:%[0-9]+]] = CSINCWr %wzr, %wzr, 12, implicit %nzcv
+# CHECK:    %1 = ORRWrr [[TST_MI]], [[TST_GT]]
+
+# CHECK:    FCMPDrr %2, %2, implicit-def %nzcv
+# CHECK:    %3 = CSINCWr %wzr, %wzr, 5, implicit %nzcv
+
+body:             |
+  bb.0:
+    liveins: %w0, %x0
+
+    %0(s32) = COPY %s0
+    %1(s1) = G_FCMP floatpred(one), %0, %0
+
+    %2(s64) = COPY %d0
+    %3(s1) = G_FCMP floatpred(uge), %2, %2
+
+...
+
+---
+# CHECK-LABEL: name: phi
+name:            phi
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: fpr32 }
+registers:
+  - { id: 0, class: fpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: fpr }
+
+# CHECK:  body:
+# CHECK:    bb.1:
+# CHECK:      %2 = PHI %0, %bb.0, %2, %bb.1
+
+body:             |
+  bb.0:
+    liveins: %s0, %w0
+    successors: %bb.1
+    %0(s32) = COPY %s0
+    %1(s1) = COPY %w0
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    %2(s32) = PHI %0, %bb.0, %2, %bb.1
+    G_BRCOND %1, %bb.1
+
+  bb.2:
+    %s0 = COPY %2
+    RET_ReallyLR implicit %s0
+...
+
+---
+# CHECK-LABEL: name: select
+name:            select
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr32 }
+# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 3, class: gpr32 }
+# CHECK-NEXT:  - { id: 4, class: gpr64 }
+# CHECK-NEXT:  - { id: 5, class: gpr64 }
+# CHECK-NEXT:  - { id: 6, class: gpr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+  - { id: 5, class: gpr }
+  - { id: 6, class: gpr }
+
+# CHECK:  body:
+# CHECK:      %wzr = ANDSWri %0, 0, implicit-def %nzcv
+# CHECK:      %3 = CSELWr %1, %2, 1, implicit %nzcv
+# CHECK:      %wzr = ANDSWri %0, 0, implicit-def %nzcv
+# CHECK:      %6 = CSELXr %4, %5, 1, implicit %nzcv
+body:             |
+  bb.0:
+    liveins: %w0, %w1, %w2
+    %0(s1) = COPY %w0
+
+    %1(s32) = COPY %w1
+    %2(s32) = COPY %w2
+    %3(s32) = G_SELECT %0, %1, %2
+
+    %4(s64) = COPY %x0
+    %5(s64) = COPY %x1
+    %6(s64) = G_SELECT %0, %4, %5
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll
new file mode 100644
index 000000000000..579ef777223c
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=aarch64-apple-ios %s -stop-after=irtranslator -o - -global-isel | FileCheck %s
+
+
+; CHECK: name: test_stack_guard
+
+; CHECK: stack:
+; CHECK:  - { id: 0, name: StackGuardSlot, offset: 0, size: 8, alignment: 8 }
+; CHECK-NOT: id: 1
+
+; CHECK: [[GUARD_SLOT:%[0-9]+]](p0) = G_FRAME_INDEX %stack.0.StackGuardSlot
+; CHECK: [[GUARD:%[0-9]+]](p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard)
+; CHECK: G_STORE [[GUARD]](p0), [[GUARD_SLOT]](p0) :: (volatile store 8 into %stack.0.StackGuardSlot)
+declare void @llvm.stackprotector(i8*, i8**)
+define void @test_stack_guard_remat2() {
+  %StackGuardSlot = alloca i8*
+  call void @llvm.stackprotector(i8* undef, i8** %StackGuardSlot)
+  ret void
+}
+
+@__stack_chk_guard = external global i64*
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index 7d416d9b0add..e023e32bb7b1 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -1,15 +1,15 @@
-; RUN: llc -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
-; REQUIRES: global-isel
+; RUN: llc -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+
 ; This file checks that the translation from llvm IR to generic MachineInstr
 ; is correct.
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64-apple-ios"
+target triple = "aarch64--"
 
 ; Tests for add.
-; CHECK: name: addi64
-; CHECK: [[ARG1:%[0-9]+]](64) = COPY %x0
-; CHECK-NEXT: [[ARG2:%[0-9]+]](64) = COPY %x1
-; CHECK-NEXT: [[RES:%[0-9]+]](64) = G_ADD i64 [[ARG1]], [[ARG2]]
+; CHECK-LABEL: name: addi64
+; CHECK:      [[ARG1:%[0-9]+]](s64) = COPY %x0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1
+; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_ADD [[ARG1]], [[ARG2]]
 ; CHECK-NEXT: %x0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit %x0 
 define i64 @addi64(i64 %arg1, i64 %arg2) {
@@ -17,18 +17,48 @@ define i64 @addi64(i64 %arg1, i64 %arg2) {
   ret i64 %res
 }
 
+; CHECK-LABEL: name: muli64
+; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1
+; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_MUL [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %x0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %x0
+define i64 @muli64(i64 %arg1, i64 %arg2) {
+  %res = mul i64 %arg1, %arg2
+  ret i64 %res
+}
+
+; Tests for alloca
+; CHECK-LABEL: name: allocai64
+; CHECK: stack:
+; CHECK-NEXT:   - { id: 0, name: ptr1, offset: 0, size: 8, alignment: 8 }
+; CHECK-NEXT:   - { id: 1, name: ptr2, offset: 0, size: 8, alignment: 1 }
+; CHECK-NEXT:   - { id: 2, name: ptr3, offset: 0, size: 128, alignment: 8 }
+; CHECK-NEXT:   - { id: 3, name: ptr4, offset: 0, size: 1, alignment: 8 }
+; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.0.ptr1
+; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.1.ptr2
+; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.2.ptr3
+; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.3.ptr4
+define void @allocai64() {
+  %ptr1 = alloca i64
+  %ptr2 = alloca i64, align 1
+  %ptr3 = alloca i64, i32 16
+  %ptr4 = alloca [0 x i64]
+  ret void
+}
+
 ; Tests for br.
-; CHECK: name: uncondbr
+; CHECK-LABEL: name: uncondbr
 ; CHECK: body:
 ;
-; Entry basic block.
-; CHECK: {{[0-9a-zA-Z._-]+}}:
+; ABI/constant lowering and IR-level entry basic block.
+; CHECK: {{bb.[0-9]+}}:
 ;
 ; Make sure we have one successor and only one.
-; CHECK-NEXT: successors: %[[END:[0-9a-zA-Z._-]+]]({{0x[a-f0-9]+ / 0x[a-f0-9]+}} = 100.00%)
+; CHECK-NEXT: successors: %[[END:bb.[0-9]+]](0x80000000)
 ;
 ; Check that we emit the correct branch.
-; CHECK: G_BR label %[[END]]
+; CHECK: G_BR %[[END]]
 ;
 ; Check that end contains the return instruction.
 ; CHECK: [[END]]:
@@ -39,11 +69,42 @@ end:
   ret void
 }
 
+; Tests for conditional br.
+; CHECK-LABEL: name: condbr
+; CHECK: body:
+;
+; ABI/constant lowering and IR-level entry basic block.
+; CHECK: {{bb.[0-9]+}}:
+; Make sure we have two successors
+; CHECK-NEXT: successors: %[[TRUE:bb.[0-9]+]](0x40000000),
+; CHECK:                  %[[FALSE:bb.[0-9]+]](0x40000000)
+;
+; CHECK: [[ADDR:%.*]](p0) = COPY %x0
+;
+; Check that we emit the correct branch.
+; CHECK: [[TST:%.*]](s1) = G_LOAD [[ADDR]](p0)
+; CHECK: G_BRCOND [[TST]](s1), %[[TRUE]]
+; CHECK: G_BR %[[FALSE]]
+;
+; Check that each successor contains the return instruction.
+; CHECK: [[TRUE]]:
+; CHECK-NEXT: RET_ReallyLR
+; CHECK: [[FALSE]]:
+; CHECK-NEXT: RET_ReallyLR
+define void @condbr(i1* %tstaddr) {
+  %tst = load i1, i1* %tstaddr
+  br i1 %tst, label %true, label %false
+true:
+  ret void
+false:
+  ret void
+}
+
 ; Tests for or.
-; CHECK: name: ori64
-; CHECK: [[ARG1:%[0-9]+]](64) = COPY %x0
-; CHECK-NEXT: [[ARG2:%[0-9]+]](64) = COPY %x1
-; CHECK-NEXT: [[RES:%[0-9]+]](64) = G_OR i64 [[ARG1]], [[ARG2]]
+; CHECK-LABEL: name: ori64
+; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1
+; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_OR [[ARG1]], [[ARG2]]
 ; CHECK-NEXT: %x0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit %x0
 define i64 @ori64(i64 %arg1, i64 %arg2) {
@@ -51,13 +112,833 @@ define i64 @ori64(i64 %arg1, i64 %arg2) {
   ret i64 %res
 }
 
-; CHECK: name: ori32
-; CHECK: [[ARG1:%[0-9]+]](32) = COPY %w0
-; CHECK-NEXT: [[ARG2:%[0-9]+]](32) = COPY %w1
-; CHECK-NEXT: [[RES:%[0-9]+]](32) = G_OR i32 [[ARG1]], [[ARG2]]
+; CHECK-LABEL: name: ori32
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_OR [[ARG1]], [[ARG2]]
 ; CHECK-NEXT: %w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit %w0
 define i32 @ori32(i32 %arg1, i32 %arg2) {
   %res = or i32 %arg1, %arg2
   ret i32 %res
 }
+
+; Tests for xor.
+; CHECK-LABEL: name: xori64
+; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1
+; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_XOR [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %x0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %x0
+define i64 @xori64(i64 %arg1, i64 %arg2) {
+  %res = xor i64 %arg1, %arg2
+  ret i64 %res
+}
+
+; CHECK-LABEL: name: xori32
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_XOR [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @xori32(i32 %arg1, i32 %arg2) {
+  %res = xor i32 %arg1, %arg2
+  ret i32 %res
+}
+
+; Tests for and.
+; CHECK-LABEL: name: andi64
+; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1
+; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_AND [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %x0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %x0
+define i64 @andi64(i64 %arg1, i64 %arg2) {
+  %res = and i64 %arg1, %arg2
+  ret i64 %res
+}
+
+; CHECK-LABEL: name: andi32
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_AND [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @andi32(i32 %arg1, i32 %arg2) {
+  %res = and i32 %arg1, %arg2
+  ret i32 %res
+}
+
+; Tests for sub.
+; CHECK-LABEL: name: subi64
+; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1
+; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_SUB [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %x0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %x0
+define i64 @subi64(i64 %arg1, i64 %arg2) {
+  %res = sub i64 %arg1, %arg2
+  ret i64 %res
+}
+
+; CHECK-LABEL: name: subi32
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_SUB [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @subi32(i32 %arg1, i32 %arg2) {
+  %res = sub i32 %arg1, %arg2
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: ptrtoint
+; CHECK: [[ARG1:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[RES:%[0-9]+]](s64) = G_PTRTOINT [[ARG1]]
+; CHECK: %x0 = COPY [[RES]]
+; CHECK: RET_ReallyLR implicit %x0
+define i64 @ptrtoint(i64* %a) {
+  %val = ptrtoint i64* %a to i64
+  ret i64 %val
+}
+
+; CHECK-LABEL: name: inttoptr
+; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0
+; CHECK: [[RES:%[0-9]+]](p0) = G_INTTOPTR [[ARG1]]
+; CHECK: %x0 = COPY [[RES]]
+; CHECK: RET_ReallyLR implicit %x0
+define i64* @inttoptr(i64 %a) {
+  %val = inttoptr i64 %a to i64*
+  ret i64* %val
+}
+
+; CHECK-LABEL: name: trivial_bitcast
+; CHECK: [[ARG1:%[0-9]+]](p0) = COPY %x0
+; CHECK: %x0 = COPY [[ARG1]]
+; CHECK: RET_ReallyLR implicit %x0
+define i64* @trivial_bitcast(i8* %a) {
+  %val = bitcast i8* %a to i64*
+  ret i64* %val
+}
+
+; CHECK-LABEL: name: trivial_bitcast_with_copy
+; CHECK:     [[A:%[0-9]+]](p0) = COPY %x0
+; CHECK:     G_BR %[[CAST:bb\.[0-9]+]]
+
+; CHECK: [[CAST]]:
+; CHECK:     {{%[0-9]+}}(p0) = COPY [[A]]
+; CHECK:     G_BR %[[END:bb\.[0-9]+]]
+
+; CHECK: [[END]]:
+define i64* @trivial_bitcast_with_copy(i8* %a) {
+  br label %cast
+
+end:
+  ret i64* %val
+
+cast:
+  %val = bitcast i8* %a to i64*
+  br label %end
+}
+
+; CHECK-LABEL: name: bitcast
+; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0
+; CHECK: [[RES1:%[0-9]+]](<2 x s32>) = G_BITCAST [[ARG1]]
+; CHECK: [[RES2:%[0-9]+]](s64) = G_BITCAST [[RES1]]
+; CHECK: %x0 = COPY [[RES2]]
+; CHECK: RET_ReallyLR implicit %x0
+define i64 @bitcast(i64 %a) {
+  %res1 = bitcast i64 %a to <2 x i32>
+  %res2 = bitcast <2 x i32> %res1 to i64
+  ret i64 %res2
+}
+
+; CHECK-LABEL: name: trunc
+; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0
+; CHECK: [[VEC:%[0-9]+]](<4 x s32>) = G_LOAD
+; CHECK: [[RES1:%[0-9]+]](s8) = G_TRUNC [[ARG1]]
+; CHECK: [[RES2:%[0-9]+]](<4 x s16>) = G_TRUNC [[VEC]]
+define void @trunc(i64 %a) {
+  %vecptr = alloca <4 x i32>
+  %vec = load <4 x i32>, <4 x i32>* %vecptr
+  %res1 = trunc i64 %a to i8
+  %res2 = trunc <4 x i32> %vec to <4 x i16>
+  ret void
+}
+
+; CHECK-LABEL: name: load
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[ADDR42:%[0-9]+]](p42) = COPY %x1
+; CHECK: [[VAL1:%[0-9]+]](s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, align 16)
+; CHECK: [[VAL2:%[0-9]+]](s64) = G_LOAD [[ADDR42]](p42) :: (load 8 from %ir.addr42)
+; CHECK: [[SUM2:%.*]](s64) = G_ADD [[VAL1]], [[VAL2]]
+; CHECK: [[VAL3:%[0-9]+]](s64) = G_LOAD [[ADDR]](p0) :: (volatile load 8 from %ir.addr)
+; CHECK: [[SUM3:%[0-9]+]](s64) = G_ADD [[SUM2]], [[VAL3]]
+; CHECK: %x0 = COPY [[SUM3]]
+; CHECK: RET_ReallyLR implicit %x0
+define i64 @load(i64* %addr, i64 addrspace(42)* %addr42) {
+  %val1 = load i64, i64* %addr, align 16
+
+  %val2 = load i64, i64 addrspace(42)* %addr42
+  %sum2 = add i64 %val1, %val2
+
+  %val3 = load volatile i64, i64* %addr
+  %sum3 = add i64 %sum2, %val3
+  ret i64 %sum3
+}
+
+; CHECK-LABEL: name: store
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[ADDR42:%[0-9]+]](p42) = COPY %x1
+; CHECK: [[VAL1:%[0-9]+]](s64) = COPY %x2
+; CHECK: [[VAL2:%[0-9]+]](s64) = COPY %x3
+; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr, align 16)
+; CHECK: G_STORE [[VAL2]](s64), [[ADDR42]](p42) :: (store 8 into %ir.addr42)
+; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (volatile store 8 into %ir.addr)
+; CHECK: RET_ReallyLR
+define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2) {
+  store i64 %val1, i64* %addr, align 16
+  store i64 %val2, i64 addrspace(42)* %addr42
+  store volatile i64 %val1, i64* %addr
+  %sum = add i64 %val1, %val2
+  ret void
+}
+
+; CHECK-LABEL: name: intrinsics
+; CHECK: [[CUR:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[BITS:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[PTR:%[0-9]+]](p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+; CHECK: [[PTR_VEC:%[0-9]+]](p0) = G_FRAME_INDEX %stack.0.ptr.vec
+; CHECK: [[VEC:%[0-9]+]](<8 x s8>) = G_LOAD [[PTR_VEC]]
+; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[VEC]](<8 x s8>), [[VEC]](<8 x s8>), [[PTR]](p0)
+; CHECK: RET_ReallyLR
+declare i8* @llvm.returnaddress(i32)
+declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>*)
+define void @intrinsics(i32 %cur, i32 %bits) {
+  %ptr = call i8* @llvm.returnaddress(i32 0)
+  %ptr.vec = alloca <8 x i8>
+  %vec = load <8 x i8>, <8 x i8>* %ptr.vec
+  call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec, <8 x i8> %vec, i8* %ptr)
+  ret void
+}
+
+; CHECK-LABEL: name: test_phi
+; CHECK:     G_BRCOND {{%.*}}, %[[TRUE:bb\.[0-9]+]]
+; CHECK:     G_BR %[[FALSE:bb\.[0-9]+]]
+
+; CHECK: [[TRUE]]:
+; CHECK:     [[RES1:%[0-9]+]](s32) = G_LOAD
+
+; CHECK: [[FALSE]]:
+; CHECK:     [[RES2:%[0-9]+]](s32) = G_LOAD
+
+; CHECK:     [[RES:%[0-9]+]](s32) = PHI [[RES1]](s32), %[[TRUE]], [[RES2]](s32), %[[FALSE]]
+; CHECK:     %w0 = COPY [[RES]]
+define i32 @test_phi(i32* %addr1, i32* %addr2, i1 %tst) {
+  br i1 %tst, label %true, label %false
+
+true:
+  %res1 = load i32, i32* %addr1
+  br label %end
+
+false:
+  %res2 = load i32, i32* %addr2
+  br label %end
+
+end:
+  %res = phi i32 [%res1, %true], [%res2, %false]
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: unreachable
+; CHECK: G_ADD
+; CHECK-NEXT: {{^$}}
+; CHECK-NEXT: ...
+define void @unreachable(i32 %a) {
+  %sum = add i32 %a, %a
+  unreachable
+}
+
+  ; It's important that constants are after argument passing, but before the
+  ; rest of the entry block.
+; CHECK-LABEL: name: constant_int
+; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[ONE:%[0-9]+]](s32) = G_CONSTANT i32 1
+; CHECK: G_BR
+
+; CHECK: [[SUM1:%[0-9]+]](s32) = G_ADD [[IN]], [[ONE]]
+; CHECK: [[SUM2:%[0-9]+]](s32) = G_ADD [[IN]], [[ONE]]
+; CHECK: [[RES:%[0-9]+]](s32) = G_ADD [[SUM1]], [[SUM2]]
+; CHECK: %w0 = COPY [[RES]]
+
+define i32 @constant_int(i32 %in) {
+  br label %next
+
+next:
+  %sum1 = add i32 %in, 1
+  %sum2 = add i32 %in, 1
+  %res = add i32 %sum1, %sum2
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: constant_int_start
+; CHECK: [[TWO:%[0-9]+]](s32) = G_CONSTANT i32 2
+; CHECK: [[ANSWER:%[0-9]+]](s32) = G_CONSTANT i32 42
+; CHECK: [[RES:%[0-9]+]](s32) = G_ADD [[TWO]], [[ANSWER]]
+define i32 @constant_int_start() {
+  %res = add i32 2, 42
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: test_undef
+; CHECK: [[UNDEF:%[0-9]+]](s32) = IMPLICIT_DEF
+; CHECK: %w0 = COPY [[UNDEF]]
+define i32 @test_undef() {
+  ret i32 undef
+}
+
+; CHECK-LABEL: name: test_constant_inttoptr
+; CHECK: [[ONE:%[0-9]+]](s64) = G_CONSTANT i64 1
+; CHECK: [[PTR:%[0-9]+]](p0) = G_INTTOPTR [[ONE]]
+; CHECK: %x0 = COPY [[PTR]]
+define i8* @test_constant_inttoptr() {
+  ret i8* inttoptr(i64 1 to i8*)
+}
+
+  ; This failed purely because the Constant -> VReg map was kept across
+  ; functions, so reuse the "i64 1" from above.
+; CHECK-LABEL: name: test_reused_constant
+; CHECK: [[ONE:%[0-9]+]](s64) = G_CONSTANT i64 1
+; CHECK: %x0 = COPY [[ONE]]
+define i64 @test_reused_constant() {
+  ret i64 1
+}
+
+; CHECK-LABEL: name: test_sext
+; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[RES:%[0-9]+]](s64) = G_SEXT [[IN]]
+; CHECK: %x0 = COPY [[RES]]
+define i64 @test_sext(i32 %in) {
+  %res = sext i32 %in to i64
+  ret i64 %res
+}
+
+; CHECK-LABEL: name: test_zext
+; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[RES:%[0-9]+]](s64) = G_ZEXT [[IN]]
+; CHECK: %x0 = COPY [[RES]]
+define i64 @test_zext(i32 %in) {
+  %res = zext i32 %in to i64
+  ret i64 %res
+}
+
+; CHECK-LABEL: name: test_shl
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_SHL [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @test_shl(i32 %arg1, i32 %arg2) {
+  %res = shl i32 %arg1, %arg2
+  ret i32 %res
+}
+
+
+; CHECK-LABEL: name: test_lshr
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_LSHR [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @test_lshr(i32 %arg1, i32 %arg2) {
+  %res = lshr i32 %arg1, %arg2
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: test_ashr
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_ASHR [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @test_ashr(i32 %arg1, i32 %arg2) {
+  %res = ashr i32 %arg1, %arg2
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: test_sdiv
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_SDIV [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @test_sdiv(i32 %arg1, i32 %arg2) {
+  %res = sdiv i32 %arg1, %arg2
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: test_udiv
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_UDIV [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @test_udiv(i32 %arg1, i32 %arg2) {
+  %res = udiv i32 %arg1, %arg2
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: test_srem
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_SREM [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @test_srem(i32 %arg1, i32 %arg2) {
+  %res = srem i32 %arg1, %arg2
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: test_urem
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_UREM [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @test_urem(i32 %arg1, i32 %arg2) {
+  %res = urem i32 %arg1, %arg2
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: test_constant_null
+; CHECK: [[NULL:%[0-9]+]](p0) = G_CONSTANT i64 0
+; CHECK: %x0 = COPY [[NULL]]
+define i8* @test_constant_null() {
+  ret i8* null
+}
+
+; CHECK-LABEL: name: test_struct_memops
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[VAL:%[0-9]+]](s64) = G_LOAD [[ADDR]](p0) :: (load 8 from  %ir.addr, align 4)
+; CHECK: G_STORE [[VAL]](s64), [[ADDR]](p0) :: (store 8 into  %ir.addr, align 4)
+define void @test_struct_memops({ i8, i32 }* %addr) {
+  %val = load { i8, i32 }, { i8, i32 }* %addr
+  store { i8, i32 } %val, { i8, i32 }* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_i1_memops
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[VAL:%[0-9]+]](s1) = G_LOAD [[ADDR]](p0) :: (load 1 from  %ir.addr)
+; CHECK: G_STORE [[VAL]](s1), [[ADDR]](p0) :: (store 1 into  %ir.addr)
+define void @test_i1_memops(i1* %addr) {
+  %val = load i1, i1* %addr
+  store i1 %val, i1* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: int_comparison
+; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[TST:%[0-9]+]](s1) = G_ICMP intpred(ne), [[LHS]](s32), [[RHS]]
+; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0)
+define void @int_comparison(i32 %a, i32 %b, i1* %addr) {
+  %res = icmp ne i32 %a, %b
+  store i1 %res, i1* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: ptr_comparison
+; CHECK: [[LHS:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[RHS:%[0-9]+]](p0) = COPY %x1
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[TST:%[0-9]+]](s1) = G_ICMP intpred(eq), [[LHS]](p0), [[RHS]]
+; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0)
+define void @ptr_comparison(i8* %a, i8* %b, i1* %addr) {
+  %res = icmp eq i8* %a, %b
+  store i1 %res, i1* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_fadd
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FADD [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %s0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %s0
+define float @test_fadd(float %arg1, float %arg2) {
+  %res = fadd float %arg1, %arg2
+  ret float %res
+}
+
+; CHECK-LABEL: name: test_fsub
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FSUB [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %s0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %s0
+define float @test_fsub(float %arg1, float %arg2) {
+  %res = fsub float %arg1, %arg2
+  ret float %res
+}
+
+; CHECK-LABEL: name: test_fmul
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FMUL [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %s0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %s0
+define float @test_fmul(float %arg1, float %arg2) {
+  %res = fmul float %arg1, %arg2
+  ret float %res
+}
+
+; CHECK-LABEL: name: test_fdiv
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FDIV [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %s0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %s0
+define float @test_fdiv(float %arg1, float %arg2) {
+  %res = fdiv float %arg1, %arg2
+  ret float %res
+}
+
+; CHECK-LABEL: name: test_frem
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1
+; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FREM [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %s0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %s0
+define float @test_frem(float %arg1, float %arg2) {
+  %res = frem float %arg1, %arg2
+  ret float %res
+}
+
+; CHECK-LABEL: name: test_sadd_overflow
+; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_SADDO [[LHS]], [[RHS]]
+; CHECK: [[RES:%[0-9]+]](s64) = G_SEQUENCE [[VAL]](s32), 0, [[OVERFLOW]](s1), 32
+; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
+define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
+  %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_uadd_overflow
+; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[ZERO:%[0-9]+]](s1) = G_CONSTANT i1 false
+; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_UADDE [[LHS]], [[RHS]], [[ZERO]]
+; CHECK: [[RES:%[0-9]+]](s64) = G_SEQUENCE [[VAL]](s32), 0, [[OVERFLOW]](s1), 32
+; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
+  %res = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_ssub_overflow
+; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_SSUBO [[LHS]], [[RHS]]
+; CHECK: [[RES:%[0-9]+]](s64) = G_SEQUENCE [[VAL]](s32), 0, [[OVERFLOW]](s1), 32
+; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32)
+define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) {
+  %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %subr
+  ret void
+}
+
+; CHECK-LABEL: name: test_usub_overflow
+; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[ZERO:%[0-9]+]](s1) = G_CONSTANT i1 false
+; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_USUBE [[LHS]], [[RHS]], [[ZERO]]
+; CHECK: [[RES:%[0-9]+]](s64) = G_SEQUENCE [[VAL]](s32), 0, [[OVERFLOW]](s1), 32
+; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)
+define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) {
+  %res = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %subr
+  ret void
+}
+
+; CHECK-LABEL: name: test_smul_overflow
+; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_SMULO [[LHS]], [[RHS]]
+; CHECK: [[RES:%[0-9]+]](s64) = G_SEQUENCE [[VAL]](s32), 0, [[OVERFLOW]](s1), 32
+; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)
+define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
+  %res = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_umul_overflow
+; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_UMULO [[LHS]], [[RHS]]
+; CHECK: [[RES:%[0-9]+]](s64) = G_SEQUENCE [[VAL]](s32), 0, [[OVERFLOW]](s1), 32
+; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0)
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
+define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
+  %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_extractvalue
+; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD
+; CHECK: [[RES:%[0-9]+]](s32) = G_EXTRACT [[STRUCT]](s128), 64
+; CHECK: %w0 = COPY [[RES]]
+%struct.nested = type {i8, { i8, i32 }, i32}
+define i32 @test_extractvalue(%struct.nested* %addr) {
+  %struct = load %struct.nested, %struct.nested* %addr
+  %res = extractvalue %struct.nested %struct, 1, 1
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: test_extractvalue_agg
+; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD
+; CHECK: [[RES:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 32
+; CHECK: G_STORE [[RES]]
+define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) {
+  %struct = load %struct.nested, %struct.nested* %addr
+  %res = extractvalue %struct.nested %struct, 1
+  store {i8, i32} %res, {i8, i32}* %addr2
+  ret void
+}
+
+; CHECK-LABEL: name: test_insertvalue
+; CHECK: [[VAL:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD
+; CHECK: [[NEWSTRUCT:%[0-9]+]](s128) = G_INSERT [[STRUCT]](s128), [[VAL]](s32), 64
+; CHECK: G_STORE [[NEWSTRUCT]](s128),
+define void @test_insertvalue(%struct.nested* %addr, i32 %val) {
+  %struct = load %struct.nested, %struct.nested* %addr
+  %newstruct = insertvalue %struct.nested %struct, i32 %val, 1, 1
+  store %struct.nested %newstruct, %struct.nested* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_insertvalue_agg
+; CHECK: [[SMALLSTRUCT:%[0-9]+]](s64) = G_LOAD
+; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD
+; CHECK: [[RES:%[0-9]+]](s128) = G_INSERT [[STRUCT]](s128), [[SMALLSTRUCT]](s64), 32
+; CHECK: G_STORE [[RES]](s128)
+define void @test_insertvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) {
+  %smallstruct = load {i8, i32}, {i8, i32}* %addr2
+  %struct = load %struct.nested, %struct.nested* %addr
+  %res = insertvalue %struct.nested %struct, {i8, i32} %smallstruct, 1
+  store %struct.nested %res, %struct.nested* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_select
+; CHECK: [[TST:%[0-9]+]](s1) = COPY %w0
+; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w2
+; CHECK: [[RES:%[0-9]+]](s32) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]]
+; CHECK: %w0 = COPY [[RES]]
+define i32 @test_select(i1 %tst, i32 %lhs, i32 %rhs) {
+  %res = select i1 %tst, i32 %lhs, i32 %rhs
+  ret i32 %res
+}
+
+; CHECK-LABEL: name: test_select_ptr
+; CHECK: [[TST:%[0-9]+]](s1) = COPY %w0
+; CHECK: [[LHS:%[0-9]+]](p0) = COPY %x1
+; CHECK: [[RHS:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[RES:%[0-9]+]](p0) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]]
+; CHECK: %x0 = COPY [[RES]]
+define i8* @test_select_ptr(i1 %tst, i8* %lhs, i8* %rhs) {
+  %res = select i1 %tst, i8* %lhs, i8* %rhs
+  ret i8* %res
+}
+
+; CHECK-LABEL: name: test_fptosi
+; CHECK: [[FPADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[FP:%[0-9]+]](s32) = G_LOAD [[FPADDR]](p0)
+; CHECK: [[RES:%[0-9]+]](s64) = G_FPTOSI [[FP]](s32)
+; CHECK: %x0 = COPY [[RES]]
+define i64 @test_fptosi(float* %fp.addr) {
+  %fp = load float, float* %fp.addr
+  %res = fptosi float %fp to i64
+  ret i64 %res
+}
+
+; CHECK-LABEL: name: test_fptoui
+; CHECK: [[FPADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[FP:%[0-9]+]](s32) = G_LOAD [[FPADDR]](p0)
+; CHECK: [[RES:%[0-9]+]](s64) = G_FPTOUI [[FP]](s32)
+; CHECK: %x0 = COPY [[RES]]
+define i64 @test_fptoui(float* %fp.addr) {
+  %fp = load float, float* %fp.addr
+  %res = fptoui float %fp to i64
+  ret i64 %res
+}
+
+; CHECK-LABEL: name: test_sitofp
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[IN:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[FP:%[0-9]+]](s64) = G_SITOFP [[IN]](s32)
+; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0)
+define void @test_sitofp(double* %addr, i32 %in) {
+  %fp = sitofp i32 %in to double
+  store double %fp, double* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_uitofp
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[IN:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[FP:%[0-9]+]](s64) = G_UITOFP [[IN]](s32)
+; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0)
+define void @test_uitofp(double* %addr, i32 %in) {
+  %fp = uitofp i32 %in to double
+  store double %fp, double* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_fpext
+; CHECK: [[IN:%[0-9]+]](s32) = COPY %s0
+; CHECK: [[RES:%[0-9]+]](s64) = G_FPEXT [[IN]](s32)
+; CHECK: %d0 = COPY [[RES]]
+define double @test_fpext(float %in) {
+  %res = fpext float %in to double
+  ret double %res
+}
+
+; CHECK-LABEL: name: test_fptrunc
+; CHECK: [[IN:%[0-9]+]](s64) = COPY %d0
+; CHECK: [[RES:%[0-9]+]](s32) = G_FPTRUNC [[IN]](s64)
+; CHECK: %s0 = COPY [[RES]]
+define float @test_fptrunc(double %in) {
+  %res = fptrunc double %in to float
+  ret float %res
+}
+
+; CHECK-LABEL: name: test_constant_float
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[TMP:%[0-9]+]](s32) = G_FCONSTANT float 1.500000e+00
+; CHECK: G_STORE [[TMP]](s32), [[ADDR]](p0)
+define void @test_constant_float(float* %addr) {
+  store float 1.5, float* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: float_comparison
+; CHECK: [[LHSADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[RHSADDR:%[0-9]+]](p0) = COPY %x1
+; CHECK: [[BOOLADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[LHS:%[0-9]+]](s32) = G_LOAD [[LHSADDR]](p0)
+; CHECK: [[RHS:%[0-9]+]](s32) = G_LOAD [[RHSADDR]](p0)
+; CHECK: [[TST:%[0-9]+]](s1) = G_FCMP floatpred(oge), [[LHS]](s32), [[RHS]]
+; CHECK: G_STORE [[TST]](s1), [[BOOLADDR]](p0)
+define void @float_comparison(float* %a.addr, float* %b.addr, i1* %bool.addr) {
+  %a = load float, float* %a.addr
+  %b = load float, float* %b.addr
+  %res = fcmp oge float %a, %b
+  store i1 %res, i1* %bool.addr
+  ret void
+}
+
+@var = global i32 0
+
+define i32* @test_global() {
+; CHECK-LABEL: name: test_global
+; CHECK: [[TMP:%[0-9]+]](p0) = G_GLOBAL_VALUE @var{{$}}
+; CHECK: %x0 = COPY [[TMP]](p0)
+
+  ret i32* @var
+}
+
+@var1 = addrspace(42) global i32 0
+define i32 addrspace(42)* @test_global_addrspace() {
+; CHECK-LABEL: name: test_global
+; CHECK: [[TMP:%[0-9]+]](p42) = G_GLOBAL_VALUE @var1{{$}}
+; CHECK: %x0 = COPY [[TMP]](p42)
+
+  ret i32 addrspace(42)* @var1
+}
+
+
+define void()* @test_global_func() {
+; CHECK-LABEL: name: test_global_func
+; CHECK: [[TMP:%[0-9]+]](p0) = G_GLOBAL_VALUE @allocai64{{$}}
+; CHECK: %x0 = COPY [[TMP]](p0)
+
+  ret void()* @allocai64
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile)
+define void @test_memcpy(i8* %dst, i8* %src, i64 %size) {
+; CHECK-LABEL: name: test_memcpy
+; CHECK: [[DST:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[SRC:%[0-9]+]](p0) = COPY %x1
+; CHECK: [[SIZE:%[0-9]+]](s64) = COPY %x2
+; CHECK: %x0 = COPY [[DST]]
+; CHECK: %x1 = COPY [[SRC]]
+; CHECK: %x2 = COPY [[SIZE]]
+; CHECK: BL $memcpy, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0)
+  ret void
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i1)
+declare i32 @llvm.objectsize.i32(i8*, i1)
+define void @test_objectsize(i8* %addr0, i8* %addr1) {
+; CHECK-LABEL: name: test_objectsize
+; CHECK: [[ADDR0:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[ADDR1:%[0-9]+]](p0) = COPY %x1
+; CHECK: {{%[0-9]+}}(s64) = G_CONSTANT i64 -1
+; CHECK: {{%[0-9]+}}(s64) = G_CONSTANT i64 0
+; CHECK: {{%[0-9]+}}(s32) = G_CONSTANT i32 -1
+; CHECK: {{%[0-9]+}}(s32) = G_CONSTANT i32 0
+  %size64.0 = call i64 @llvm.objectsize.i64(i8* %addr0, i1 0)
+  %size64.intmin = call i64 @llvm.objectsize.i64(i8* %addr0, i1 1)
+  %size32.0 = call i32 @llvm.objectsize.i32(i8* %addr0, i1 0)
+  %size32.intmin = call i32 @llvm.objectsize.i32(i8* %addr0, i1 1)
+  ret void
+}
+
+define void @test_large_const(i128* %addr) {
+; CHECK-LABEL: name: test_large_const
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[VAL:%[0-9]+]](s128) = G_CONSTANT i128 42
+; CHECK: G_STORE [[VAL]](s128), [[ADDR]](p0)
+  store i128 42, i128* %addr
+  ret void
+}
+
+; When there was no formal argument handling (so the first BB was empty) we used
+; to insert the constants at the end of the block, even if they were encountered
+; after the block's terminators had been emitted. Also make sure the order is
+; correct.
+define i8* @test_const_placement() {
+; CHECK-LABEL: name: test_const_placement
+; CHECK: bb.{{[0-9]+}}:
+; CHECK:   [[VAL_INT:%[0-9]+]](s32) = G_CONSTANT i32 42
+; CHECK:   [[VAL:%[0-9]+]](p0) = G_INTTOPTR [[VAL_INT]](s32)
+; CHECK:   G_BR
+  br label %next
+
+next:
+  ret i8* inttoptr(i32 42 to i8*)
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
index f5d85e189d75..4c67c0daaf74 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
@@ -1,11 +1,10 @@
 # RUN: llc -O0 -run-pass=regbankselect -global-isel %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
 # RUN: llc -O0 -run-pass=regbankselect -global-isel %s -regbankselect-greedy -o - 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=GREEDY
-# REQUIRES: global-isel
 
 --- |
   ; ModuleID = 'generic-virtual-registers-type-error.mir'
   target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-  target triple = "aarch64-apple-ios"
+  target triple = "aarch64--"
   define void @defaultMapping() {
   entry:
     ret void
@@ -54,22 +53,47 @@
   entry:
     ret void
   }
+
+  define void @ignoreTargetSpecificInst() { ret void }
+
+  define void @regBankSelected_property() { ret void }
+
+  define void @bitcast_s32_gpr() { ret void }
+  define void @bitcast_s32_fpr() { ret void }
+  define void @bitcast_s32_gpr_fpr() { ret void }
+  define void @bitcast_s32_fpr_gpr() { ret void }
+  define void @bitcast_s64_gpr() { ret void }
+  define void @bitcast_s64_fpr() { ret void }
+  define void @bitcast_s64_gpr_fpr() { ret void }
+  define void @bitcast_s64_fpr_gpr() { ret void }
+
+  define i64 @greedyWithChainOfComputation(i64 %arg1, <2 x i32>* %addr) {
+    %varg1 = bitcast i64 %arg1 to <2 x i32>
+    %varg2 = load <2 x i32>, <2 x i32>* %addr
+    %vres = or <2 x i32> %varg1, %varg2
+    %res = bitcast <2 x i32> %vres to i64
+    ret i64 %res
+  }
 ...
 
 ---
 # Check that we assign a relevant register bank for %0.
 # Based on the type i32, this should be gpr.
 name:            defaultMapping
-isSSA:           true
+legalized:       true
+# CHECK-LABEL: name: defaultMapping
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr }
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
 registers:
   - { id: 0, class: _ }
+  - { id: 1, class: _ }
 body: |
   bb.0.entry:
     liveins: %x0
-    ; CHECK:      %0(32) = G_ADD i32 %x0
-    %0(32) = G_ADD i32 %x0, %x0
+    ; CHECK:      %1(s32) = G_ADD %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_ADD %0, %0
 ...
 
 ---
@@ -77,16 +101,21 @@ body: |
 # Based on the type <2 x i32>, this should be fpr.
 # FPR is used for both floating point and vector registers.
 name:            defaultMappingVector
-isSSA:           true
+legalized:       true
+# CHECK-LABEL: name: defaultMappingVector
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: fpr }
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
 registers:
   - { id: 0, class: _ }
+  - { id: 1, class: _ }
 body: |
   bb.0.entry:
     liveins: %d0
-    ; CHECK:      %0(32) = G_ADD <2 x i32> %d0
-    %0(32) = G_ADD <2 x i32> %d0, %d0
+    ; CHECK:      %0(<2 x s32>) = COPY %d0
+    ; CHECK:      %1(<2 x s32>) = G_ADD %0
+    %0(<2 x s32>) = COPY %d0
+    %1(<2 x s32>) = G_ADD %0, %0
 ...
 
 ---
@@ -94,27 +123,33 @@ body: |
 # Indeed based on the source of the copy it should live
 # in FPR, but at the use, it should be GPR.
 name:            defaultMapping1Repair
-isSSA:           true
+legalized:       true
+# CHECK-LABEL: name: defaultMapping1Repair
 # CHECK:      registers:
 # CHECK-NEXT:   - { id: 0, class: fpr }
 # CHECK-NEXT:   - { id: 1, class: gpr }
 # CHECK-NEXT:   - { id: 2, class: gpr }
+# CHECK-NEXT:   - { id: 3, class: gpr }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
+  - { id: 2, class: _ }
 body: |
   bb.0.entry:
     liveins: %s0, %x0
-    ; CHECK:           %0(32) = COPY %s0
-    ; CHECK-NEXT:      %2(32) = COPY %0
-    ; CHECK-NEXT:      %1(32) = G_ADD i32 %2, %x0
-    %0(32) = COPY %s0
-    %1(32) = G_ADD i32 %0, %x0
+    ; CHECK:           %0(s32) = COPY %s0
+    ; CHECK-NEXT:      %1(s32) = COPY %w0
+    ; CHECK-NEXT:      %3(s32) = COPY %0
+    ; CHECK-NEXT:      %2(s32) = G_ADD %3, %1
+    %0(s32) = COPY %s0
+    %1(s32) = COPY %w0
+    %2(s32) = G_ADD %0, %1
 ...
 
 # Check that we repair the assignment for %0 differently for both uses.
 name:            defaultMapping2Repairs
-isSSA:           true
+legalized:       true
+# CHECK-LABEL: name: defaultMapping2Repairs
 # CHECK:      registers:
 # CHECK-NEXT:   - { id: 0, class: fpr }
 # CHECK-NEXT:   - { id: 1, class: gpr }
@@ -126,12 +161,12 @@ registers:
 body: |
   bb.0.entry:
     liveins: %s0, %x0
-    ; CHECK:           %0(32) = COPY %s0
-    ; CHECK-NEXT:      %2(32) = COPY %0
-    ; CHECK-NEXT:      %3(32) = COPY %0
-    ; CHECK-NEXT:      %1(32) = G_ADD i32 %2, %3
-    %0(32) = COPY %s0
-    %1(32) = G_ADD i32 %0, %0
+    ; CHECK:           %0(s32) = COPY %s0
+    ; CHECK-NEXT:      %2(s32) = COPY %0
+    ; CHECK-NEXT:      %3(s32) = COPY %0
+    ; CHECK-NEXT:      %1(s32) = G_ADD %2, %3
+    %0(s32) = COPY %s0
+    %1(s32) = G_ADD %0, %0
 ...
 
 ---
@@ -140,7 +175,8 @@ body: |
 # requires that it lives in GPR. Make sure regbankselect
 # fixes that.
 name:            defaultMappingDefRepair
-isSSA:           true
+legalized:       true
+# CHECK-LABEL: name: defaultMappingDefRepair
 # CHECK:      registers:
 # CHECK-NEXT:   - { id: 0, class: gpr }
 # CHECK-NEXT:   - { id: 1, class: fpr }
@@ -151,17 +187,17 @@ registers:
 body: |
   bb.0.entry:
     liveins: %w0
-    ; CHECK:           %0(32) = COPY %w0
-    ; CHECK-NEXT:      %2(32) = G_ADD i32 %0, %w0
-    ; CHECK-NEXT:      %1(32) = COPY %2
-    %0(32) = COPY %w0
-    %1(32) = G_ADD i32 %0, %w0
+    ; CHECK:           %0(s32) = COPY %w0
+    ; CHECK-NEXT:      %2(s32) = G_ADD %0, %0
+    ; CHECK-NEXT:      %1(s32) = COPY %2
+    %0(s32) = COPY %w0
+    %1(s32) = G_ADD %0, %0
 ...
 
 ---
 # Check that we are able to propagate register banks from phis.
 name:            phiPropagation
-isSSA:           true
+legalized:       true
 tracksRegLiveness:   true
 # CHECK:      registers:
 # CHECK-NEXT:   - { id: 0, class: gpr32 }
@@ -175,71 +211,82 @@ registers:
   - { id: 2, class: gpr32 }
   - { id: 3, class: _ }
   - { id: 4, class: _ }
+  - { id: 5, class: _ }
 body: |
   bb.0.entry:
     successors: %bb.2.end, %bb.1.then
     liveins: %x0, %x1, %w2
-  
+
     %0 = LDRWui killed %x0, 0 :: (load 4 from %ir.src)
-    %1 = COPY %x1
+    %5(s32) = COPY %0
+    %1(p0) = COPY %x1
     %2 = COPY %w2
     TBNZW killed %2, 0, %bb.2.end
-  
+
   bb.1.then:
     successors: %bb.2.end
-    %3(32) = G_ADD i32 %0, %0
-  
+    %3(s32) = G_ADD %5, %5
+
   bb.2.end:
-    %4(32) = PHI %0, %bb.0.entry, %3, %bb.1.then
-    STRWui killed %4, killed %1, 0 :: (store 4 into %ir.dst)
+    %4(s32) = PHI %0, %bb.0.entry, %3, %bb.1.then
+    G_STORE killed %4, killed %1 :: (store 4 into %ir.dst)
     RET_ReallyLR
 ...
 
 ---
 # Make sure we can repair physical register uses as well.
 name:            defaultMappingUseRepairPhysReg
-isSSA:           true
+legalized:       true
+# CHECK-LABEL: name: defaultMappingUseRepairPhysReg
 # CHECK:      registers:
 # CHECK-NEXT:   - { id: 0, class: gpr }
-# CHECK-NEXT:   - { id: 1, class: gpr }
+# CHECK-NEXT:   - { id: 1, class: fpr }
 # CHECK-NEXT:   - { id: 2, class: gpr }
+# CHECK-NEXT:   - { id: 3, class: gpr }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
+  - { id: 2, class: _ }
 body: |
   bb.0.entry:
     liveins: %w0, %s0
-    ; CHECK:           %0(32) = COPY %w0
-    ; CHECK-NEXT:      %2(32) = COPY %s0
-    ; CHECK-NEXT:      %1(32) = G_ADD i32 %0, %2
-    %0(32) = COPY %w0
-    %1(32) = G_ADD i32 %0, %s0
+    ; CHECK:           %0(s32) = COPY %w0
+    ; CHECK-NEXT:      %1(s32) = COPY %s0
+    ; CHECK-NEXT:      %3(s32) = COPY %1
+    ; CHECK-NEXT:      %2(s32) = G_ADD %0, %3
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %s0
+    %2(s32) = G_ADD %0, %1
 ...
 
 ---
 # Make sure we can repair physical register defs.
 name:            defaultMappingDefRepairPhysReg
-isSSA:           true
+legalized:       true
+# CHECK-LABEL: name: defaultMappingDefRepairPhysReg
 # CHECK:      registers:
 # CHECK-NEXT:   - { id: 0, class: gpr }
 # CHECK-NEXT:   - { id: 1, class: gpr }
 registers:
   - { id: 0, class: _ }
+  - { id: 1, class: _ }
 body: |
   bb.0.entry:
     liveins: %w0
-    ; CHECK:           %0(32) = COPY %w0
-    ; CHECK-NEXT:      %1(32) = G_ADD i32 %0, %0
+    ; CHECK:           %0(s32) = COPY %w0
+    ; CHECK-NEXT:      %1(s32) = G_ADD %0, %0
     ; CHECK-NEXT:      %s0 = COPY %1
-    %0(32) = COPY %w0
-    %s0 = G_ADD i32 %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_ADD %0, %0
+    %s0 = COPY %1
 ...
 
 ---
 # Check that the greedy mode is able to switch the
 # G_OR instruction from fpr to gpr.
 name:            greedyMappingOr
-isSSA:           true
+legalized:       true
+# CHECK-LABEL: name: greedyMappingOr
 # CHECK:      registers:
 # CHECK-NEXT:  - { id: 0, class: gpr }
 # CHECK-NEXT:  - { id: 1, class: gpr }
@@ -261,23 +308,23 @@ registers:
 body: |
   bb.0.entry:
     liveins: %x0, %x1
-    ; CHECK: %0(64) = COPY %x0
-    ; CHECK-NEXT: %1(64) = COPY %x1
+    ; CHECK: %0(<2 x s32>) = COPY %x0
+    ; CHECK-NEXT: %1(<2 x s32>) = COPY %x1
 
 
     ; Fast mode tries to reuse the source of the copy for the destination.
     ; Now, the default mapping says that %0 and %1 need to be in FPR.
     ; The repairing code insert two copies to materialize that.
-    ; FAST-NEXT: %3(64) = COPY %0
-    ; FAST-NEXT: %4(64) = COPY %1
+    ; FAST-NEXT: %3(s64) = COPY %0
+    ; FAST-NEXT: %4(s64) = COPY %1
     ; The mapping of G_OR is on FPR.
-    ; FAST-NEXT: %2(64) = G_OR <2 x i32> %3, %4
+    ; FAST-NEXT: %2(<2 x s32>) = G_OR %3, %4
 
     ; Greedy mode remapped the instruction on the GPR bank.
-    ; GREEDY-NEXT: %2(64) = G_OR <2 x i32> %0, %1
-    %0(64) = COPY %x0
-    %1(64) = COPY %x1
-    %2(64) = G_OR <2 x i32> %0, %1
+    ; GREEDY-NEXT: %2(<2 x s32>) = G_OR %0, %1
+    %0(<2 x s32>) = COPY %x0
+    %1(<2 x s32>) = COPY %x1
+    %2(<2 x s32>) = G_OR %0, %1
 ...
 
 ---
@@ -285,7 +332,8 @@ body: |
 # G_OR instruction from fpr to gpr, while still honoring
 # %2 constraint.
 name:            greedyMappingOrWithConstraints
-isSSA:           true
+legalized:       true
+# CHECK-LABEL: name: greedyMappingOrWithConstraints
 # CHECK:      registers:
 # CHECK-NEXT:  - { id: 0, class: gpr }
 # CHECK-NEXT:  - { id: 1, class: gpr }
@@ -307,23 +355,298 @@ registers:
 body: |
   bb.0.entry:
     liveins: %x0, %x1
-    ; CHECK: %0(64) = COPY %x0
-    ; CHECK-NEXT: %1(64) = COPY %x1
+    ; CHECK: %0(<2 x s32>) = COPY %x0
+    ; CHECK-NEXT: %1(<2 x s32>) = COPY %x1
 
 
     ; Fast mode tries to reuse the source of the copy for the destination.
     ; Now, the default mapping says that %0 and %1 need to be in FPR.
     ; The repairing code insert two copies to materialize that.
-    ; FAST-NEXT: %3(64) = COPY %0
-    ; FAST-NEXT: %4(64) = COPY %1
+    ; FAST-NEXT: %3(s64) = COPY %0
+    ; FAST-NEXT: %4(s64) = COPY %1
     ; The mapping of G_OR is on FPR.
-    ; FAST-NEXT: %2(64) = G_OR <2 x i32> %3, %4
+    ; FAST-NEXT: %2(<2 x s32>) = G_OR %3, %4
 
     ; Greedy mode remapped the instruction on the GPR bank.
-    ; GREEDY-NEXT: %3(64) = G_OR <2 x i32> %0, %1
+    ; GREEDY-NEXT: %3(s64) = G_OR %0, %1
     ; We need to keep %2 into FPR because we do not know anything about it.
-    ; GREEDY-NEXT: %2(64) = COPY %3
-    %0(64) = COPY %x0
-    %1(64) = COPY %x1
-    %2(64) = G_OR <2 x i32> %0, %1
+    ; GREEDY-NEXT: %2(<2 x s32>) = COPY %3
+    %0(<2 x s32>) = COPY %x0
+    %1(<2 x s32>) = COPY %x1
+    %2(<2 x s32>) = G_OR %0, %1
+...
+
+---
+# CHECK-LABEL: name: ignoreTargetSpecificInst
+name:            ignoreTargetSpecificInst
+legalized:       true
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr64 }
+# CHECK-NEXT:  - { id: 1, class: gpr64 }
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+body: |
+  bb.0:
+    liveins: %x0
+
+    ; CHECK: %0 = COPY %x0
+    ; CHECK-NEXT: %1 = ADDXrr %0, %0
+    ; CHECK-NEXT: %x0 = COPY %1
+    ; CHECK-NEXT: RET_ReallyLR implicit %x0
+
+    %0 = COPY %x0
+    %1 = ADDXrr %0, %0
+    %x0 = COPY %1
+    RET_ReallyLR implicit %x0
+...
+
+---
+# Check that we set the "regBankSelected" property.
+# CHECK-LABEL: name: regBankSelected_property
+# CHECK: legalized: true
+# CHECK: regBankSelected: true
+name:            regBankSelected_property
+legalized:       true
+regBankSelected: false
+body:             |
+  bb.0:
+...
+
+---
+# CHECK-LABEL: name: bitcast_s32_gpr
+name:            bitcast_s32_gpr
+legalized:       true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr }
+# CHECK-NEXT:  - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+
+# CHECK:  body:
+# CHECK:    %0(s32) = COPY %w0
+# CHECK:    %1(s32) = G_BITCAST %0
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(s32) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s32_fpr
+name:            bitcast_s32_fpr
+legalized:       true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr }
+# CHECK-NEXT:  - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+
+# CHECK:  body:
+# CHECK:    %0(<2 x s16>) = COPY %s0
+# CHECK:    %1(<2 x s16>) = G_BITCAST %0
+body:             |
+  bb.0:
+    liveins: %s0
+
+    %0(<2 x s16>) = COPY %s0
+    %1(<2 x s16>) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s32_gpr_fpr
+name:            bitcast_s32_gpr_fpr
+legalized:       true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr }
+# FAST-NEXT:  - { id: 1, class: fpr }
+# GREEDY-NEXT:  - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+
+# CHECK:  body:
+# CHECK:    %0(s32) = COPY %w0
+# CHECK:    %1(<2 x s16>) = G_BITCAST %0
+body:             |
+  bb.0:
+    liveins: %w0
+
+    %0(s32) = COPY %w0
+    %1(<2 x s16>) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s32_fpr_gpr
+name:            bitcast_s32_fpr_gpr
+legalized:       true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr }
+# FAST-NEXT:  - { id: 1, class: gpr }
+# GREEDY-NEXT:  - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+
+# CHECK:  body:
+# CHECK:    %0(<2 x s16>) = COPY %s0
+# CHECK:    %1(s32) = G_BITCAST %0
+body:             |
+  bb.0:
+    liveins: %s0
+
+    %0(<2 x s16>) = COPY %s0
+    %1(s32) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s64_gpr
+name:            bitcast_s64_gpr
+legalized:       true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr }
+# CHECK-NEXT:  - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+
+# CHECK:  body:
+# CHECK:    %0(s64) = COPY %x0
+# CHECK:    %1(s64) = G_BITCAST %0
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(s64) = COPY %x0
+    %1(s64) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s64_fpr
+name:            bitcast_s64_fpr
+legalized:       true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr }
+# CHECK-NEXT:  - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+
+# CHECK:  body:
+# CHECK:    %0(<2 x s32>) = COPY %d0
+# CHECK:    %1(<2 x s32>) = G_BITCAST %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(<2 x s32>) = COPY %d0
+    %1(<2 x s32>) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s64_gpr_fpr
+name:            bitcast_s64_gpr_fpr
+legalized:       true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: gpr }
+# FAST-NEXT:  - { id: 1, class: fpr }
+# GREEDY-NEXT:  - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# CHECK:  body:
+# CHECK:    %0(s64) = COPY %x0
+# CHECK:    %1(<2 x s32>) = G_BITCAST %0
+body:             |
+  bb.0:
+    liveins: %x0
+
+    %0(s64) = COPY %x0
+    %1(<2 x s32>) = G_BITCAST %0
+...
+
+---
+# CHECK-LABEL: name: bitcast_s64_fpr_gpr
+name:            bitcast_s64_fpr_gpr
+legalized:       true
+
+# CHECK:      registers:
+# CHECK-NEXT:  - { id: 0, class: fpr }
+# FAST-NEXT:  - { id: 1, class: gpr }
+# GREEDY-NEXT:  - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+
+# CHECK:  body:
+# CHECK:    %0(<2 x s32>) = COPY %d0
+# CHECK:    %1(s64) = G_BITCAST %0
+body:             |
+  bb.0:
+    liveins: %d0
+
+    %0(<2 x s32>) = COPY %d0
+    %1(s64) = G_BITCAST %0
+...
+
+---
+# Make sure the greedy mode is able to take advantage of the
+# alternative mappings of G_LOAD to coalesce the whole chain
+# of computation on GPR.
+# CHECK-LABEL: name: greedyWithChainOfComputation
+name:            greedyWithChainOfComputation
+legalized:       true
+
+# CHECK: registers:
+# CHECK-NEXT:  - { id: 0, class: gpr }
+# CHECK-NEXT:  - { id: 1, class: gpr }
+# FAST-NEXT:   - { id: 2, class: fpr }
+# FAST-NEXT:   - { id: 3, class: fpr }
+# FAST-NEXT:   - { id: 4, class: fpr }
+# GREEDY-NEXT: - { id: 2, class: gpr }
+# GREEDY-NEXT: - { id: 3, class: gpr }
+# GREEDY-NEXT: - { id: 4, class: gpr }
+# CHECK-NEXT:  - { id: 5, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+
+# No repairing should be necessary for both modes.
+# CHECK:         %0(s64) = COPY %x0
+# CHECK-NEXT:    %1(p0) = COPY %x1
+# CHECK-NEXT:    %2(<2 x s32>) = G_BITCAST %0(s64)
+# CHECK-NEXT:    %3(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr)
+# CHECK-NEXT:    %4(<2 x s32>) = G_OR %2, %3
+# CHECK-NEXT:    %5(s64) = G_BITCAST %4(<2 x s32>)
+# CHECK-NEXT:    %x0 = COPY %5(s64)
+# CHECK-NEXT:    RET_ReallyLR implicit %x0
+
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(p0) = COPY %x1
+    %2(<2 x s32>) = G_BITCAST %0(s64)
+    %3(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr)
+    %4(<2 x s32>) = G_OR %2, %3
+    %5(s64) = G_BITCAST %4(<2 x s32>)
+    %x0 = COPY %5(s64)
+    RET_ReallyLR implicit %x0
+
 ...
diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
new file mode 100644
index 000000000000..4e6b9cad4c3d
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-apple-ios -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+
+
+; CHECK-LABEL: name: test_stack_slots
+; CHECK: fixedStack:
+; CHECK-DAG:  - { id: [[STACK0:[0-9]+]], offset: 0, size: 1
+; CHECK-DAG:  - { id: [[STACK8:[0-9]+]], offset: 1, size: 1
+; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
+; CHECK: [[LHS:%[0-9]+]](s8) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0)
+; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
+; CHECK: [[RHS:%[0-9]+]](s8) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0)
+; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[LHS]], [[RHS]]
+; CHECK: [[SUM32:%[0-9]+]](s32) = G_SEXT [[SUM]](s8)
+; CHECK: %w0 = COPY [[SUM32]](s32)
+define signext i8 @test_stack_slots([8 x i64], i8 signext %lhs, i8 signext %rhs) {
+  %sum = add i8 %lhs, %rhs
+  ret i8 %sum
+}
+
+; CHECK-LABEL: name: test_call_stack
+; CHECK: [[C42:%[0-9]+]](s8) = G_CONSTANT i8 42
+; CHECK: [[C12:%[0-9]+]](s8) = G_CONSTANT i8 12
+; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[C42_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 0
+; CHECK: [[C42_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C42_OFFS]](s64)
+; CHECK: G_STORE [[C42]](s8), [[C42_LOC]](p0) :: (store 1 into stack, align 0)
+; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[C12_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 1
+; CHECK: [[C12_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C12_OFFS]](s64)
+; CHECK: G_STORE [[C12]](s8), [[C12_LOC]](p0) :: (store 1 into stack + 1, align 0)
+; CHECK: BL @test_stack_slots
+define void @test_call_stack() {
+  call signext i8 @test_stack_slots([8 x i64] undef, i8 signext 42, i8 signext 12)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll
new file mode 100644
index 000000000000..7bedad38de1a
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll
@@ -0,0 +1,196 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+
+; CHECK-LABEL: name: test_trivial_call
+; CHECK: BL @trivial_callee, csr_aarch64_aapcs, implicit-def %lr
+declare void @trivial_callee()
+define void @test_trivial_call() {
+  call void @trivial_callee()
+  ret void
+}
+
+; CHECK-LABEL: name: test_simple_return
+; CHECK: BL @simple_return_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit-def %x0
+; CHECK: [[RES:%[0-9]+]](s64) = COPY %x0
+; CHECK: %x0 = COPY [[RES]]
+; CHECK: RET_ReallyLR implicit %x0
+declare i64 @simple_return_callee()
+define i64 @test_simple_return() {
+  %res = call i64 @simple_return_callee()
+  ret i64 %res
+}
+
+; CHECK-LABEL: name: test_simple_arg
+; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0
+; CHECK: %w0 = COPY [[IN]]
+; CHECK: BL @simple_arg_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0
+; CHECK: RET_ReallyLR
+declare void @simple_arg_callee(i32 %in)
+define void @test_simple_arg(i32 %in) {
+  call void @simple_arg_callee(i32 %in)
+  ret void
+}
+
+; CHECK-LABEL: name: test_indirect_call
+; CHECK: registers:
+; Make sure the register feeding the indirect call is properly constrained.
+; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64 }
+; CHECK: %[[FUNC]](p0) = COPY %x0
+; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def %lr, implicit %sp
+; CHECK: RET_ReallyLR
+define void @test_indirect_call(void()* %func) {
+  call void %func()
+  ret void
+}
+
+; CHECK-LABEL: name: test_multiple_args
+; CHECK: [[IN:%[0-9]+]](s64) = COPY %x0
+; CHECK: [[ANSWER:%[0-9]+]](s32) = G_CONSTANT i32 42
+; CHECK: %w0 = COPY [[ANSWER]]
+; CHECK: %x1 = COPY [[IN]]
+; CHECK: BL @multiple_args_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0, implicit %x1
+; CHECK: RET_ReallyLR
+declare void @multiple_args_callee(i32, i64)
+define void @test_multiple_args(i64 %in) {
+  call void @multiple_args_callee(i32 42, i64 %in)
+  ret void
+}
+
+
+; CHECK-LABEL: name: test_struct_formal
+; CHECK: [[DBL:%[0-9]+]](s64) = COPY %d0
+; CHECK: [[I64:%[0-9]+]](s64) = COPY %x0
+; CHECK: [[I8:%[0-9]+]](s8) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2
+; CHECK: [[ARG:%[0-9]+]](s192) = G_SEQUENCE [[DBL]](s64), 0, [[I64]](s64), 64, [[I8]](s8), 128
+; CHECK: G_STORE [[ARG]](s192), [[ADDR]](p0)
+; CHECK: RET_ReallyLR
+define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) {
+  store {double, i64, i8} %in, {double, i64, i8}* %addr
+  ret void
+}
+
+
+; CHECK-LABEL: name: test_struct_return
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[VAL:%[0-9]+]](s192) = G_LOAD [[ADDR]](p0)
+; CHECK: [[DBL:%[0-9]+]](s64), [[I64:%[0-9]+]](s64), [[I32:%[0-9]+]](s32) = G_EXTRACT [[VAL]](s192), 0, 64, 128
+; CHECK: %d0 = COPY [[DBL]](s64)
+; CHECK: %x0 = COPY [[I64]](s64)
+; CHECK: %w1 = COPY [[I32]](s32)
+; CHECK: RET_ReallyLR implicit %d0, implicit %x0, implicit %w1
+define {double, i64, i32} @test_struct_return({double, i64, i32}* %addr) {
+  %val = load {double, i64, i32}, {double, i64, i32}* %addr
+  ret {double, i64, i32} %val
+}
+
+; CHECK-LABEL: name: test_arr_call
+; CHECK: [[ARG:%[0-9]+]](s256) = G_LOAD
+; CHECK: [[E0:%[0-9]+]](s64), [[E1:%[0-9]+]](s64), [[E2:%[0-9]+]](s64), [[E3:%[0-9]+]](s64) = G_EXTRACT [[ARG]](s256), 0, 64, 128, 192
+; CHECK: %x0 = COPY [[E0]](s64)
+; CHECK: %x1 = COPY [[E1]](s64)
+; CHECK: %x2 = COPY [[E2]](s64)
+; CHECK: %x3 = COPY [[E3]](s64)
+; CHECK: BL @arr_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2, implicit %x3, implicit-def %x0, implicit-def %x1, implicit-def %x2, implicit-def %x3
+; CHECK: [[E0:%[0-9]+]](s64) = COPY %x0
+; CHECK: [[E1:%[0-9]+]](s64) = COPY %x1
+; CHECK: [[E2:%[0-9]+]](s64) = COPY %x2
+; CHECK: [[E3:%[0-9]+]](s64) = COPY %x3
+; CHECK: [[RES:%[0-9]+]](s256) = G_SEQUENCE [[E0]](s64), 0, [[E1]](s64), 64, [[E2]](s64), 128, [[E3]](s64), 192
+; CHECK: G_EXTRACT [[RES]](s256), 64
+declare [4 x i64] @arr_callee([4 x i64])
+define i64 @test_arr_call([4 x i64]* %addr) {
+  %arg = load [4 x i64], [4 x i64]* %addr
+  %res = call [4 x i64] @arr_callee([4 x i64] %arg)
+  %val = extractvalue [4 x i64] %res, 1
+  ret i64 %val
+}
+
+
+; CHECK-LABEL: name: test_abi_exts_call
+; CHECK: [[VAL:%[0-9]+]](s8) = G_LOAD
+; CHECK: %w0 = COPY [[VAL]]
+; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0
+; CHECK: [[SVAL:%[0-9]+]](s32) = G_SEXT [[VAL]](s8)
+; CHECK: %w0 = COPY [[SVAL]](s32)
+; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0
+; CHECK: [[ZVAL:%[0-9]+]](s32) = G_ZEXT [[VAL]](s8)
+; CHECK: %w0 = COPY [[ZVAL]](s32)
+; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0
+declare void @take_char(i8)
+define void @test_abi_exts_call(i8* %addr) {
+  %val = load i8, i8* %addr
+  call void @take_char(i8 %val)
+  call void @take_char(i8 signext %val)
+  call void @take_char(i8 zeroext %val)
+  ret void
+}
+
+; CHECK-LABEL: name: test_abi_sext_ret
+; CHECK: [[VAL:%[0-9]+]](s8) = G_LOAD
+; CHECK: [[SVAL:%[0-9]+]](s32) = G_SEXT [[VAL]](s8)
+; CHECK: %w0 = COPY [[SVAL]](s32)
+; CHECK: RET_ReallyLR implicit %w0
+define signext i8 @test_abi_sext_ret(i8* %addr) {
+  %val = load i8, i8* %addr
+  ret i8 %val
+}
+
+; CHECK-LABEL: name: test_abi_zext_ret
+; CHECK: [[VAL:%[0-9]+]](s8) = G_LOAD
+; CHECK: [[SVAL:%[0-9]+]](s32) = G_ZEXT [[VAL]](s8)
+; CHECK: %w0 = COPY [[SVAL]](s32)
+; CHECK: RET_ReallyLR implicit %w0
+define zeroext i8 @test_abi_zext_ret(i8* %addr) {
+  %val = load i8, i8* %addr
+  ret i8 %val
+}
+
+; CHECK-LABEL: name: test_stack_slots
+; CHECK: fixedStack:
+; CHECK-DAG:  - { id: [[STACK0:[0-9]+]], offset: 0, size: 8
+; CHECK-DAG:  - { id: [[STACK8:[0-9]+]], offset: 8, size: 8
+; CHECK-DAG:  - { id: [[STACK16:[0-9]+]], offset: 16, size: 8
+; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
+; CHECK: [[LHS:%[0-9]+]](s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0)
+; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
+; CHECK: [[RHS:%[0-9]+]](s64) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0)
+; CHECK: [[ADDR_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]]
+; CHECK: [[ADDR:%[0-9]+]](p0) = G_LOAD [[ADDR_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK16]], align 0)
+; CHECK: [[SUM:%[0-9]+]](s64) = G_ADD [[LHS]], [[RHS]]
+; CHECK: G_STORE [[SUM]](s64), [[ADDR]](p0)
+define void @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs, i64* %addr) {
+  %sum = add i64 %lhs, %rhs
+  store i64 %sum, i64* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_call_stack
+; CHECK: [[C42:%[0-9]+]](s64) = G_CONSTANT i64 42
+; CHECK: [[C12:%[0-9]+]](s64) = G_CONSTANT i64 12
+; CHECK: [[PTR:%[0-9]+]](p0) = G_CONSTANT i64 0
+; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[C42_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 0
+; CHECK: [[C42_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C42_OFFS]](s64)
+; CHECK: G_STORE [[C42]](s64), [[C42_LOC]](p0) :: (store 8 into stack, align 0)
+; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[C12_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 8
+; CHECK: [[C12_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C12_OFFS]](s64)
+; CHECK: G_STORE [[C12]](s64), [[C12_LOC]](p0) :: (store 8 into stack + 8, align 0)
+; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[PTR_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 16
+; CHECK: [[PTR_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[PTR_OFFS]](s64)
+; CHECK: G_STORE [[PTR]](p0), [[PTR_LOC]](p0) :: (store 8 into stack + 16, align 0)
+; CHECK: BL @test_stack_slots
+define void @test_call_stack() {
+  call void @test_stack_slots([8 x i64] undef, i64 42, i64 12, i64* null)
+  ret void
+}
+
+; CHECK-LABEL: name: test_mem_i1
+; CHECK: fixedStack:
+; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false }
+; CHECK: [[ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[SLOT]]
+; CHECK: {{%[0-9]+}}(s1) = G_LOAD [[ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[SLOT]], align 0)
+define void @test_mem_i1([8 x i64], i1 %in) {
+  ret void
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/gisel-abort.ll b/test/CodeGen/AArch64/GlobalISel/gisel-abort.ll
new file mode 100644
index 000000000000..76eafdd5af5e
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/gisel-abort.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+
+; CHECK-NOT: fallback
+; CHECK: empty
+define void @empty() {
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
new file mode 100644
index 000000000000..9051b2388fce
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
@@ -0,0 +1,44 @@
+; RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
+
+@_ZTIi = external global i8*
+
+declare i32 @foo(i32)
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(i8*)
+
+; CHECK: name: bar
+; CHECK: body:
+; CHECK-NEXT:   bb.1:
+; CHECK:     successors: %[[GOOD:bb.[0-9]+]]{{.*}}%[[BAD:bb.[0-9]+]]
+; CHECK:     EH_LABEL
+; CHECK:     %w0 = COPY
+; CHECK:     BL @foo, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0, implicit-def %w0
+; CHECK:     {{%[0-9]+}}(s32) = COPY %w0
+; CHECK:     EH_LABEL
+
+; CHECK:   [[BAD]] (landing-pad):
+; CHECK:     EH_LABEL
+; CHECK:     [[PTR:%[0-9]+]](p0) = COPY %x0
+; CHECK:     [[SEL:%[0-9]+]](p0) = COPY %x1
+; CHECK:     [[PTR_SEL:%[0-9]+]](s128) = G_SEQUENCE [[PTR]](p0), 0, [[SEL]](p0), 64
+; CHECK:     [[PTR_RET:%[0-9]+]](s64), [[SEL_RET:%[0-9]+]](s32) = G_EXTRACT [[PTR_SEL]](s128), 0, 64
+; CHECK:     %x0 = COPY [[PTR_RET]]
+; CHECK:     %w1 = COPY [[SEL_RET]]
+
+; CHECK:   [[GOOD]]:
+; CHECK:     [[SEL:%[0-9]+]](s32) = G_CONSTANT i32 1
+; CHECK:     {{%[0-9]+}}(s128) = G_INSERT {{%[0-9]+}}(s128), [[SEL]](s32), 64
+
+define { i8*, i32 } @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  %res32 = invoke i32 @foo(i32 42) to label %continue unwind label %broken
+
+
+broken:
+  %ptr.sel = landingpad { i8*, i32 } catch i8* bitcast(i8** @_ZTIi to i8*)
+  ret { i8*, i32 } %ptr.sel
+
+continue:
+  %sel.int = tail call i32 @llvm.eh.typeid.for(i8* bitcast(i8** @_ZTIi to i8*))
+  %res.good = insertvalue { i8*, i32 } undef, i32 %sel.int, 1
+  ret { i8*, i32 } %res.good
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
new file mode 100644
index 000000000000..252e60c6b2ec
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
@@ -0,0 +1,118 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_scalar_add_big() {
+  entry:
+    ret void
+  }
+  define void @test_scalar_add_small() {
+  entry:
+    ret void
+  }
+  define void @test_vector_add() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_scalar_add_big
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_scalar_add_big
+    ; CHECK-NOT: G_EXTRACT
+    ; CHECK-NOT: G_SEQUENCE
+    ; CHECK-DAG: [[CARRY0_32:%.*]](s32) = G_CONSTANT i32 0
+    ; CHECK-DAG: [[CARRY0:%[0-9]+]](s1) = G_TRUNC [[CARRY0_32]]
+    ; CHECK: [[RES_LO:%.*]](s64), [[CARRY:%.*]](s1) = G_UADDE %0, %2, [[CARRY0]]
+    ; CHECK: [[RES_HI:%.*]](s64), {{%.*}}(s1) = G_UADDE %1, %3, [[CARRY]]
+    ; CHECK-NOT: G_EXTRACT
+    ; CHECK-NOT: G_SEQUENCE
+    ; CHECK: %x0 = COPY [[RES_LO]]
+    ; CHECK: %x1 = COPY [[RES_HI]]
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = COPY %x2
+    %3(s64) = COPY %x3
+    %4(s128) = G_SEQUENCE %0, 0, %1, 64
+    %5(s128) = G_SEQUENCE %2, 0, %3, 64
+    %6(s128) = G_ADD %4, %5
+    %7(s64), %8(s64) = G_EXTRACT %6, 0, 64
+    %x0 = COPY %7
+    %x1 = COPY %8
+...
+
+---
+name:            test_scalar_add_small
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_scalar_add_small
+    ; CHECK: [[RES:%.*]](s8) = G_ADD %2, %3
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s8) = G_TRUNC %0
+    %3(s8) = G_TRUNC %1
+    %4(s8) = G_ADD %2, %3
+    %5(s64) = G_ANYEXT %4
+    %x0 = COPY %5
+...
+
+---
+name:            test_vector_add
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %q0, %q1, %q2, %q3
+    ; CHECK-LABEL: name: test_vector_add
+    ; CHECK-NOT: G_EXTRACT
+    ; CHECK-NOT: G_SEQUENCE
+    ; CHECK: [[RES_LO:%.*]](<2 x s64>) = G_ADD %0, %2
+    ; CHECK: [[RES_HI:%.*]](<2 x s64>) = G_ADD %1, %3
+    ; CHECK-NOT: G_EXTRACT
+    ; CHECK-NOT: G_SEQUENCE
+    ; CHECK: %q0 = COPY [[RES_LO]]
+    ; CHECK: %q1 = COPY [[RES_HI]]
+
+    %0(<2 x s64>) = COPY %q0
+    %1(<2 x s64>) = COPY %q1
+    %2(<2 x s64>) = COPY %q2
+    %3(<2 x s64>) = COPY %q3
+    %4(<4 x s64>) = G_SEQUENCE %0, 0, %1, 128
+    %5(<4 x s64>) = G_SEQUENCE %2, 0, %3, 128
+    %6(<4 x s64>) = G_ADD %4, %5
+    %7(<2 x s64>), %8(<2 x s64>) = G_EXTRACT %6, 0, 128
+    %q0 = COPY %7
+    %q1 = COPY %8
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-and.mir b/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
new file mode 100644
index 000000000000..69459bfacb0a
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-and.mir
@@ -0,0 +1,34 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_scalar_and_small() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_scalar_and_small
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_scalar_and_small
+    ; CHECK: %4(s8) = G_AND %2, %3
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s8) = G_TRUNC %0
+    %3(s8) = G_TRUNC %1
+    %4(s8) = G_AND %2, %3
+    %5(s64) = G_ANYEXT %2
+    %x0 = COPY %5
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
new file mode 100644
index 000000000000..926a62761ce0
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
@@ -0,0 +1,45 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_icmp() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_icmp
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+  - { id: 9, class: _ }
+  - { id: 10, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x0
+
+    %2(s8) = G_TRUNC %0
+    %3(s8) = G_TRUNC %1
+
+    ; CHECK: %4(s1) = G_ICMP intpred(sge), %0(s64), %1
+    %4(s1) = G_ICMP intpred(sge), %0, %1
+
+    ; CHECK: [[LHS32:%[0-9]+]](s32) = G_ZEXT %2
+    ; CHECK: [[RHS32:%[0-9]+]](s32) = G_ZEXT %3
+    ; CHECK: %8(s1) = G_ICMP intpred(ult), [[LHS32]](s32), [[RHS32]]
+    %8(s1) = G_ICMP intpred(ult), %2, %3
+
+    %9(p0) = G_INTTOPTR %0(s64)
+    %10(s1) = G_ICMP intpred(eq), %9(p0), %9(p0)
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir
new file mode 100644
index 000000000000..cc1dc80488ba
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir
@@ -0,0 +1,92 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_combines() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_combines
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+  - { id: 9, class: _ }
+  - { id: 10, class: _ }
+  - { id: 11, class: _ }
+  - { id: 12, class: _ }
+  - { id: 13, class: _ }
+  - { id: 14, class: _ }
+  - { id: 15, class: _ }
+  - { id: 16, class: _ }
+  - { id: 17, class: _ }
+  - { id: 18, class: _ }
+  - { id: 19, class: _ }
+  - { id: 20, class: _ }
+  - { id: 21, class: _ }
+  - { id: 22, class: _ }
+  - { id: 23, class: _ }
+  - { id: 24, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %w0, %w1, %x2, %x3
+
+    %0(s32) = COPY %w0
+    %1(s32) = COPY %w1
+    %2(s8) = G_TRUNC %0
+
+      ; Only one of these extracts can be eliminated, the offsets don't match
+      ; properly in the other cases.
+    ; CHECK-LABEL: name: test_combines
+    ; CHECK: %3(s32) = G_SEQUENCE %2(s8), 1
+    ; CHECK: %4(s8) = G_EXTRACT %3(s32), 0
+    ; CHECK-NOT: G_EXTRACT
+    ; CHECK: %6(s8) = G_EXTRACT %3(s32), 2
+    ; CHECK: %7(s32) = G_ZEXT %2(s8)
+    %3(s32) = G_SEQUENCE %2, 1
+    %4(s8) = G_EXTRACT %3, 0
+    %5(s8) = G_EXTRACT %3, 1
+    %6(s8) = G_EXTRACT %3, 2
+    %7(s32) = G_ZEXT %5
+
+      ; Similarly, here the types don't match.
+    ; CHECK: %10(s32) = G_SEQUENCE %8(s16), 0, %9(s16), 16
+    ; CHECK: %11(s1) = G_EXTRACT %10(s32), 0
+    ; CHECK: %12(s32) = G_EXTRACT %10(s32), 0
+    %8(s16) = G_TRUNC %0
+    %9(s16) = G_ADD %8, %8
+    %10(s32) = G_SEQUENCE %8, 0, %9, 16
+    %11(s1) = G_EXTRACT %10, 0
+    %12(s32) = G_EXTRACT %10, 0
+
+    ; CHECK-NOT: G_EXTRACT
+    ; CHECK: %15(s16) = G_ADD %8, %9
+    %13(s16), %14(s16) = G_EXTRACT %10, 0, 16
+    %15(s16) = G_ADD %13, %14
+
+    ; CHECK: %18(<2 x s32>) = G_EXTRACT %17(s128), 0
+    ; CHECK: %19(<2 x s32>) = G_ADD %18, %18
+    %16(s64) = COPY %x0
+    %17(s128) = G_SEQUENCE %16, 0, %16, 64
+    %18(<2 x s32>) = G_EXTRACT %17, 0
+    %19(<2 x s32>) = G_ADD %18, %18
+
+    ; CHECK-NOT: G_SEQUENCE
+    ; CHECK-NOT: G_EXTRACT
+    ; CHECK: %24(s32) = G_ADD %0, %20
+    %20(s32) = G_ADD %0, %0
+    %21(s64) = G_SEQUENCE %0, 0, %20, 32
+    %22(s32) = G_EXTRACT %21, 0
+    %23(s32) = G_EXTRACT %21, 32
+    %24(s32) = G_ADD %22, %23
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
new file mode 100644
index 000000000000..56a7d4736ae8
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
@@ -0,0 +1,77 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_constant() {
+  entry:
+    ret void
+  }
+  define void @test_fconstant() {
+  entry:
+    ret void
+  }
+  @var = global i8 0
+  define i8* @test_global() { ret i8* undef }
+...
+
+---
+name:            test_constant
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    ; CHECK-LABEL: name: test_constant
+    ; CHECK: [[TMP:%[0-9]+]](s32) = G_CONSTANT i32 0
+    ; CHECK: %0(s1) = G_TRUNC [[TMP]]
+    ; CHECK: [[TMP:%[0-9]+]](s32) = G_CONSTANT i32 42
+    ; CHECK: %1(s8) = G_TRUNC [[TMP]]
+    ; CHECK: [[TMP:%[0-9]+]](s32) = G_CONSTANT i32 -1
+    ; CHECK: %2(s16) = G_TRUNC [[TMP]]
+    ; CHECK: %3(s32) = G_CONSTANT i32 -1
+    ; CHECK: %4(s64) = G_CONSTANT i64 1
+    ; CHECK: %5(s64) = G_CONSTANT i64 0
+
+    %0(s1) = G_CONSTANT i1 0
+    %1(s8) = G_CONSTANT i8 42
+    %2(s16) = G_CONSTANT i16 65535
+    %3(s32) = G_CONSTANT i32 -1
+    %4(s64) = G_CONSTANT i64 1
+    %5(s64) = G_CONSTANT i64 0
+...
+
+---
+name:            test_fconstant
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body: |
+  bb.0.entry:
+    ; CHECK-LABEL: name: test_fconstant
+    ; CHECK: %0(s32) = G_FCONSTANT  float 1.000000e+00
+    ; CHECK: %1(s64) = G_FCONSTANT  double 2.000000e+00
+    ; CHECK: [[TMP:%[0-9]+]](s32) = G_FCONSTANT half 0xH0000
+    ; CHECK; %2(s16) = G_FPTRUNC [[TMP]]
+
+    %0(s32) = G_FCONSTANT float 1.0
+    %1(s64) = G_FCONSTANT double 2.0
+    %2(s16) = G_FCONSTANT half 0.0
+...
+
+---
+name:            test_global
+registers:
+  - { id: 0, class: _ }
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test_global
+    ; CHECK: %0(p0) = G_GLOBAL_VALUE @var
+
+    %0(p0) = G_GLOBAL_VALUE @var
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-div.mir b/test/CodeGen/AArch64/GlobalISel/legalize-div.mir
new file mode 100644
index 000000000000..aaef45d3c928
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-div.mir
@@ -0,0 +1,42 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_div() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_div
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s8) = G_TRUNC %0
+    %3(s8) = G_TRUNC %1
+
+
+    ; CHECK: [[LHS32:%[0-9]+]](s32) = G_SEXT %2
+    ; CHECK: [[RHS32:%[0-9]+]](s32) = G_SEXT %3
+    ; CHECK: [[QUOT32:%[0-9]+]](s32) = G_SDIV [[LHS32]], [[RHS32]]
+    ; CHECK: [[RES:%[0-9]+]](s8) = G_TRUNC [[QUOT32]]
+    %4(s8) = G_SDIV %2, %3
+
+    ; CHECK: [[LHS32:%[0-9]+]](s32) = G_ZEXT %2
+    ; CHECK: [[RHS32:%[0-9]+]](s32) = G_ZEXT %3
+    ; CHECK: [[QUOT32:%[0-9]+]](s32) = G_UDIV [[LHS32]], [[RHS32]]
+    ; CHECK: [[RES:%[0-9]+]](s8) = G_TRUNC [[QUOT32]]
+    %5(s8) = G_UDIV %2, %3
+
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir b/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir
new file mode 100644
index 000000000000..9907f009d931
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir
@@ -0,0 +1,79 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_ext() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_ext
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+  - { id: 9, class: _ }
+  - { id: 10, class: _ }
+  - { id: 11, class: _ }
+  - { id: 12, class: _ }
+  - { id: 13, class: _ }
+  - { id: 14, class: _ }
+  - { id: 15, class: _ }
+  - { id: 16, class: _ }
+  - { id: 17, class: _ }
+  - { id: 18, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    %0(s64) = COPY %x0
+
+    ; CHECK: %1(s1) = G_TRUNC %0
+    ; CHECK: %2(s8) = G_TRUNC %0
+    ; CHECK: %3(s16) = G_TRUNC %0
+    ; CHECK: %4(s32) = G_TRUNC %0
+    %1(s1) = G_TRUNC %0
+    %2(s8) = G_TRUNC %0
+    %3(s16) = G_TRUNC %0
+    %4(s32) = G_TRUNC %0
+
+    ; CHECK: %5(s64) = G_ANYEXT %1
+    ; CHECK: %6(s64) = G_ZEXT %2
+    ; CHECK: %7(s64) = G_ANYEXT %3
+    ; CHECK: %8(s64) = G_SEXT %4
+    %5(s64) = G_ANYEXT %1
+    %6(s64) = G_ZEXT %2
+    %7(s64) = G_ANYEXT %3
+    %8(s64) = G_SEXT %4
+
+    ; CHECK: %9(s32) = G_SEXT %1
+    ; CHECK: %10(s32) = G_ZEXT %2
+    ; CHECK: %11(s32) = G_ANYEXT %3
+    %9(s32) = G_SEXT %1
+    %10(s32) = G_ZEXT %2
+    %11(s32) = G_ANYEXT %3
+
+    ; CHECK: %12(s32) = G_ZEXT %1
+    ; CHECK: %13(s32) = G_ANYEXT %2
+    ; CHECK: %14(s32) = G_SEXT %3
+    %12(s32) = G_ZEXT %1
+    %13(s32) = G_ANYEXT %2
+    %14(s32) = G_SEXT %3
+
+    ; CHECK: %15(s8) = G_ZEXT %1
+    ; CHECK: %16(s16) = G_ANYEXT %2
+    %15(s8) = G_ZEXT %1
+    %16(s16) = G_ANYEXT %2
+
+    ; CHECK: %18(s64) = G_FPEXT %17
+    %17(s32) = G_TRUNC  %0
+    %18(s64) = G_FPEXT %17
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir b/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir
new file mode 100644
index 000000000000..72bd613fab3a
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir
@@ -0,0 +1,35 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_icmp() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_icmp
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x0
+
+    %2(s32) = G_TRUNC %0
+    %3(s32) = G_TRUNC %1
+
+    ; CHECK: %4(s1) = G_FCMP floatpred(oge), %0(s64), %1
+    %4(s1) = G_FCMP floatpred(oge), %0, %1
+
+    ; CHECK: %5(s1) = G_FCMP floatpred(uno), %2(s32), %3
+    %5(s1) = G_FCMP floatpred(uno), %2, %3
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir b/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir
new file mode 100644
index 000000000000..3f11c123ba51
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir
@@ -0,0 +1,31 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_gep_small() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_gep_small
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_gep_small
+    ; CHECK: [[OFFSET_EXT:%[0-9]+]](s64) = G_SEXT %2(s8)
+    ; CHECK: %3(p0) = G_GEP %0, [[OFFSET_EXT]](s64)
+
+    %0(p0) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s8) = G_TRUNC %1
+    %3(p0) = G_GEP %0, %2(s8)
+    %x0 = COPY %3
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir b/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir
new file mode 100644
index 000000000000..43aa06ba3d90
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir
@@ -0,0 +1,33 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_copy() { ret void }
+  define void @test_targetspecific() { ret void }
+...
+
+---
+name:            test_copy
+registers:
+  - { id: 0, class: _ }
+body: |
+  bb.0:
+    liveins: %x0
+    ; CHECK-LABEL: name: test_copy
+    ; CHECK: %0(s64) = COPY %x0
+    ; CHECK-NEXT: %x0 = COPY %0
+
+    %0(s64) = COPY %x0
+    %x0 = COPY %0
+...
+
+---
+name:            test_targetspecific
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test_targetspecific
+    ; CHECK: RET_ReallyLR
+
+    RET_ReallyLR
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
new file mode 100644
index 000000000000..6a86686fa4bd
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
@@ -0,0 +1,95 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_load(i8* %addr) {
+  entry:
+    ret void
+  }
+  define void @test_store(i8* %addr) {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_load
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_load
+    %0(p0) = COPY %x0
+
+    ; CHECK: [[BIT8:%[0-9]+]](s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr)
+    ; CHECK: %1(s1) = G_TRUNC [[BIT8]]
+    %1(s1) = G_LOAD %0 :: (load 1 from %ir.addr)
+
+    ; CHECK: %2(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr)
+    %2(s8) = G_LOAD %0 :: (load 1 from %ir.addr)
+
+    ; CHECK: %3(s16) = G_LOAD %0(p0) :: (load 2 from %ir.addr)
+    %3(s16) = G_LOAD %0 :: (load 2 from %ir.addr)
+
+    ; CHECK: %4(s32) = G_LOAD %0(p0) :: (load 4 from %ir.addr)
+    %4(s32) = G_LOAD %0 :: (load 4 from %ir.addr)
+
+    ; CHECK: %5(s64) = G_LOAD %0(p0) :: (load 8 from %ir.addr)
+    %5(s64) = G_LOAD %0 :: (load 8 from %ir.addr)
+
+    ; CHECK: %6(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr)
+    %6(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr)
+
+    ; CHECK: %7(<2 x s32>) = G_LOAD %0(p0) :: (load 8 from %ir.addr)
+    %7(<2 x s32>) = G_LOAD %0(p0) :: (load 8 from %ir.addr)
+...
+
+---
+name:            test_store
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_store
+
+    %0(p0) = COPY %x0
+    %1(s32) = COPY %w1
+
+    ; CHECK: [[BIT8:%[0-9]+]](s8) = G_ANYEXT %2(s1)
+    ; CHECK: G_STORE [[BIT8]](s8), %0(p0) :: (store 1 into %ir.addr)
+    %2(s1) = G_TRUNC %1
+    G_STORE %2, %0 :: (store 1 into %ir.addr)
+
+    ; CHECK: G_STORE %3(s8), %0(p0) :: (store 1 into %ir.addr)
+    %3(s8) = G_TRUNC %1
+    G_STORE %3, %0 :: (store 1 into %ir.addr)
+
+    ; CHECK: G_STORE %4(s16), %0(p0) :: (store 2 into %ir.addr)
+    %4(s16) = G_TRUNC %1
+    G_STORE %4, %0 :: (store 2 into %ir.addr)
+
+    ; CHECK: G_STORE %1(s32), %0(p0) :: (store 4 into %ir.addr)
+    G_STORE %1, %0 :: (store 4 into %ir.addr)
+
+    ; CHECK: G_STORE %5(s64), %0(p0) :: (store 8 into %ir.addr)
+    %5(s64) = G_PTRTOINT %0(p0)
+    G_STORE %5, %0 :: (store 8 into %ir.addr)
+
+    ; CHECK: G_STORE %0(p0), %0(p0) :: (store 8 into %ir.addr)
+    G_STORE %0(p0), %0(p0) :: (store 8 into %ir.addr)
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
new file mode 100644
index 000000000000..eb642d4b1a74
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
@@ -0,0 +1,34 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_scalar_mul_small() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_scalar_mul_small
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_scalar_mul_small
+    ; CHECK: %4(s8) = G_MUL %2, %3
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s8) = G_TRUNC %0
+    %3(s8) = G_TRUNC %1
+    %4(s8) = G_MUL %2, %3
+    %5(s64) = G_ANYEXT %2
+    %x0 = COPY %5
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-or.mir b/test/CodeGen/AArch64/GlobalISel/legalize-or.mir
new file mode 100644
index 000000000000..edf10cd411eb
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-or.mir
@@ -0,0 +1,34 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_scalar_or_small() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_scalar_or_small
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_scalar_or_small
+    ; CHECK: %4(s8) = G_OR %2, %3
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s8) = G_TRUNC %0
+    %3(s8) = G_TRUNC %1
+    %4(s8) = G_OR %2, %3
+    %5(s64) = G_ANYEXT %2
+    %x0 = COPY %5
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-property.mir b/test/CodeGen/AArch64/GlobalISel/legalize-property.mir
new file mode 100644
index 000000000000..1381484443e6
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-property.mir
@@ -0,0 +1,17 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @legalized_property() { ret void }
+...
+
+---
+# Check that we set the "legalized" property.
+# CHECK-LABEL: name: legalized_property
+# CHECK: legalized: true
+name:            legalized_property
+legalized:       false
+body:             |
+  bb.0:
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir b/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir
new file mode 100644
index 000000000000..e77f3487609f
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir
@@ -0,0 +1,66 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_rem() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_rem
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+  - { id: 9, class: _ }
+  - { id: 10, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+
+    ; CHECK: [[QUOT:%[0-9]+]](s64) = G_UDIV %0, %1
+    ; CHECK: [[PROD:%[0-9]+]](s64) = G_MUL [[QUOT]], %1
+    ; CHECK: [[RES:%[0-9]+]](s64) = G_SUB %0, [[PROD]]
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s64) = G_UREM %0, %1
+
+    ; CHECK: [[QUOT:%[0-9]+]](s32) = G_SDIV %3, %4
+    ; CHECK: [[PROD:%[0-9]+]](s32) = G_MUL [[QUOT]], %4
+    ; CHECK: [[RES:%[0-9]+]](s32) = G_SUB %3, [[PROD]]
+    %3(s32) = G_TRUNC %0
+    %4(s32) = G_TRUNC %1
+    %5(s32) = G_SREM %3, %4
+
+    ; CHECK: [[LHS32:%[0-9]+]](s32) = G_SEXT %6
+    ; CHECK: [[RHS32:%[0-9]+]](s32) = G_SEXT %7
+    ; CHECK: [[QUOT32:%[0-9]+]](s32) = G_SDIV [[LHS32]], [[RHS32]]
+    ; CHECK: [[QUOT:%[0-9]+]](s8) = G_TRUNC [[QUOT32]]
+    ; CHECK: [[PROD:%[0-9]+]](s8) = G_MUL [[QUOT]], %7
+    ; CHECK: [[RES:%[0-9]+]](s8) = G_SUB %6, [[PROD]]
+    %6(s8) = G_TRUNC %0
+    %7(s8) = G_TRUNC %1
+    %8(s8) = G_SREM %6, %7
+
+    ; CHECK: %d0 = COPY %0
+    ; CHECK: %d1 = COPY %1
+    ; CHECK: BL $fmod, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit-def %d0
+    ; CHECK: %9(s64) = COPY %d0
+    %9(s64) = G_FREM %0, %1
+
+    ; CHECK: %s0 = COPY %3
+    ; CHECK: %s1 = COPY %4
+    ; CHECK: BL $fmodf, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %s1, implicit-def %s0
+    ; CHECK: %10(s32) = COPY %s0
+    %10(s32) = G_FREM %3, %4
+
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir b/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir
new file mode 100644
index 000000000000..41a9c33bfad8
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir
@@ -0,0 +1,133 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_simple() {
+  entry:
+    ret void
+  next:
+    ret void
+  }
+...
+
+---
+name:            test_simple
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: _ }
+  - { id: 9, class: _ }
+  - { id: 10, class: _ }
+  - { id: 11, class: _ }
+  - { id: 12, class: _ }
+  - { id: 13, class: _ }
+  - { id: 14, class: _ }
+  - { id: 15, class: _ }
+  - { id: 16, class: _ }
+  - { id: 17, class: _ }
+  - { id: 18, class: _ }
+  - { id: 19, class: _ }
+  - { id: 20, class: _ }
+  - { id: 21, class: _ }
+  - { id: 22, class: _ }
+  - { id: 23, class: _ }
+  - { id: 24, class: _ }
+  - { id: 25, class: _ }
+  - { id: 26, class: _ }
+  - { id: 27, class: _ }
+  - { id: 28, class: _ }
+  - { id: 29, class: _ }
+  - { id: 30, class: _ }
+  - { id: 31, class: _ }
+  - { id: 32, class: _ }
+  - { id: 33, class: _ }
+  - { id: 34, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    %0(s64) = COPY %x0
+
+    ; CHECK-LABEL: name: test_simple
+    ; CHECK: %1(p0) = G_INTTOPTR %0
+    ; CHECK: %2(s64) = G_PTRTOINT %1
+    %1(p0) = G_INTTOPTR %0
+    %2(s64) = G_PTRTOINT %1
+
+    ; CHECK: G_BRCOND %3(s1), %bb.1.next
+    %3(s1) = G_TRUNC %0
+    G_BRCOND %3, %bb.1.next
+
+  bb.1.next:
+    %4(s32) = G_TRUNC %0
+
+    ; CHECK: %5(s1) = G_FPTOSI %4
+    ; CHECK: %6(s8) = G_FPTOUI %4
+    ; CHECK: %7(s16) = G_FPTOSI %4
+    ; CHECK: %8(s32) = G_FPTOUI %4
+    ; CHECK: %9(s64) = G_FPTOSI %4
+    %5(s1) = G_FPTOSI %4
+    %6(s8) = G_FPTOUI %4
+    %7(s16) = G_FPTOSI %4
+    %8(s32) = G_FPTOUI %4
+    %9(s64) = G_FPTOSI %4
+
+    ; CHECK: %10(s1) = G_FPTOUI %0
+    ; CHECK: %11(s8) = G_FPTOSI %0
+    ; CHECK: %12(s16) = G_FPTOUI %0
+    ; CHECK: %13(s32) = G_FPTOSI %0
+    ; CHECK: %14(s32) = G_FPTOUI %0
+    %10(s1) = G_FPTOUI %0
+    %11(s8) = G_FPTOSI %0
+    %12(s16) = G_FPTOUI %0
+    %13(s32) = G_FPTOSI %0
+    %14(s32) = G_FPTOUI %0
+
+    ; CHECK: %15(s32) = G_UITOFP %5
+    ; CHECK: %16(s32) = G_SITOFP %11
+    ; CHECK: %17(s32) = G_UITOFP %7
+    ; CHECK: %18(s32) = G_SITOFP %4
+    ; CHECK: %19(s32) = G_UITOFP %0
+    %15(s32) = G_UITOFP %5
+    %16(s32) = G_SITOFP %11
+    %17(s32) = G_UITOFP %7
+    %18(s32) = G_SITOFP %4
+    %19(s32) = G_UITOFP %0
+
+    ; CHECK: %20(s64) = G_SITOFP %5
+    ; CHECK: %21(s64) = G_UITOFP %11
+    ; CHECK: %22(s64) = G_SITOFP %7
+    ; CHECK: %23(s64) = G_UITOFP %4
+    ; CHECK: %24(s64) = G_SITOFP %0
+    %20(s64) = G_SITOFP %5
+    %21(s64) = G_UITOFP %11
+    %22(s64) = G_SITOFP %7
+    %23(s64) = G_UITOFP %4
+    %24(s64) = G_SITOFP %0
+
+    ; CHECK: %25(s1) = G_SELECT %10(s1), %10, %5
+    ; CHECK: %26(s8) = G_SELECT %10(s1), %6, %11
+    ; CHECK: %27(s16) = G_SELECT %10(s1), %12, %7
+    ; CHECK: %28(s32) = G_SELECT %10(s1), %15, %16
+    ; CHECK: %29(s64) = G_SELECT %10(s1), %9, %24
+    %25(s1) = G_SELECT %10, %10, %5
+    %26(s8) = G_SELECT %10, %6, %11
+    %27(s16) = G_SELECT %10, %12, %7
+    %28(s32) = G_SELECT %10, %15, %16
+    %29(s64) = G_SELECT %10, %9, %24
+
+    ; CHECK: %30(<2 x s32>) = G_BITCAST %9
+    ; CHECK: %31(s64) = G_BITCAST %30
+    ; CHECK: %32(s32) = G_BITCAST %15
+    %30(<2 x s32>) = G_BITCAST %9
+    %31(s64) = G_BITCAST %30
+    %32(s32) = G_BITCAST %15
+    %33(<4 x s8>) = G_BITCAST %15
+    %34(<2 x s16>) = G_BITCAST %15
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir b/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir
new file mode 100644
index 000000000000..e5403cb73c37
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir
@@ -0,0 +1,34 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_scalar_sub_small() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_scalar_sub_small
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_scalar_sub_small
+    ; CHECK: [[RES:%.*]](s8) = G_SUB %2, %3
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s8) = G_TRUNC %0
+    %3(s8) = G_TRUNC %1
+    %4(s8) = G_SUB %2, %3
+    %5(s64) = G_ANYEXT %2
+    %x0 = COPY %5
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir b/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir
new file mode 100644
index 000000000000..919e674965c0
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir
@@ -0,0 +1,34 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_scalar_xor_small() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_scalar_xor_small
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %x0, %x1, %x2, %x3
+    ; CHECK-LABEL: name: test_scalar_xor_small
+    ; CHECK: %4(s8) = G_XOR %2, %3
+
+    %0(s64) = COPY %x0
+    %1(s64) = COPY %x1
+    %2(s8) = G_TRUNC %0
+    %3(s8) = G_TRUNC %1
+    %4(s8) = G_XOR %2, %3
+    %5(s64) = G_ANYEXT %2
+    %x0 = COPY %5
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/lit.local.cfg b/test/CodeGen/AArch64/GlobalISel/lit.local.cfg
new file mode 100644
index 000000000000..e99d1bb8446c
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'global-isel' in config.root.available_features:
+    config.unsupported = True
diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir
new file mode 100644
index 000000000000..12162eb54a83
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir
@@ -0,0 +1,870 @@
+# RUN: llc -O0 -mtriple arm64-- -run-pass=regbankselect -global-isel %s -o - | FileCheck %s
+
+# Check the default mappings for various instructions.
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+  define void @test_add_s32() { ret void }
+  define void @test_add_v4s32() { ret void }
+  define void @test_sub_s32() { ret void }
+  define void @test_sub_v4s32() { ret void }
+  define void @test_mul_s32() { ret void }
+  define void @test_mul_v4s32() { ret void }
+
+  define void @test_and_s32() { ret void }
+  define void @test_and_v4s32() { ret void }
+  define void @test_or_s32() { ret void }
+  define void @test_or_v4s32() { ret void }
+  define void @test_xor_s32() { ret void }
+  define void @test_xor_v4s32() { ret void }
+
+  define void @test_shl_s32() { ret void }
+  define void @test_shl_v4s32() { ret void }
+  define void @test_lshr_s32() { ret void }
+  define void @test_ashr_s32() { ret void }
+
+  define void @test_sdiv_s32() { ret void }
+  define void @test_udiv_s32() { ret void }
+
+  define void @test_anyext_s64_s32() { ret void }
+  define void @test_sext_s64_s32() { ret void }
+  define void @test_zext_s64_s32() { ret void }
+  define void @test_trunc_s32_s64() { ret void }
+
+  define void @test_constant_s32() { ret void }
+  define void @test_constant_p0() { ret void }
+
+  define void @test_icmp_s32() { ret void }
+  define void @test_icmp_p0() { ret void }
+
+  define void @test_frame_index_p0() {
+    %ptr0 = alloca i64
+    ret void
+  }
+
+  define void @test_ptrtoint_s64_p0() { ret void }
+  define void @test_inttoptr_p0_s64() { ret void }
+
+  define void @test_load_s32_p0() { ret void }
+  define void @test_store_s32_p0() { ret void }
+
+  define void @test_fadd_s32() { ret void }
+  define void @test_fsub_s32() { ret void }
+  define void @test_fmul_s32() { ret void }
+  define void @test_fdiv_s32() { ret void }
+
+  define void @test_fpext_s64_s32() { ret void }
+  define void @test_fptrunc_s32_s64() { ret void }
+
+  define void @test_fconstant_s32() { ret void }
+
+  define void @test_fcmp_s32() { ret void }
+
+  define void @test_sitofp_s64_s32() { ret void }
+  define void @test_uitofp_s32_s64() { ret void }
+
+  define void @test_fptosi_s64_s32() { ret void }
+  define void @test_fptoui_s32_s64() { ret void }
+...
+
+---
+# CHECK-LABEL: name: test_add_s32
+name:            test_add_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_ADD %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_ADD %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_add_v4s32
+name:            test_add_v4s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %q0
+    ; CHECK:      %0(<4 x s32>) = COPY %q0
+    ; CHECK:      %1(<4 x s32>) = G_ADD %0, %0
+    %0(<4 x s32>) = COPY %q0
+    %1(<4 x s32>) = G_ADD %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_sub_s32
+name:            test_sub_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_SUB %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_SUB %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_sub_v4s32
+name:            test_sub_v4s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %q0
+    ; CHECK:      %0(<4 x s32>) = COPY %q0
+    ; CHECK:      %1(<4 x s32>) = G_SUB %0, %0
+    %0(<4 x s32>) = COPY %q0
+    %1(<4 x s32>) = G_SUB %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_mul_s32
+name:            test_mul_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_MUL %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_MUL %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_mul_v4s32
+name:            test_mul_v4s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %q0
+    ; CHECK:      %0(<4 x s32>) = COPY %q0
+    ; CHECK:      %1(<4 x s32>) = G_MUL %0, %0
+    %0(<4 x s32>) = COPY %q0
+    %1(<4 x s32>) = G_MUL %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_and_s32
+name:            test_and_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_AND %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_AND %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_and_v4s32
+name:            test_and_v4s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %q0
+    ; CHECK:      %0(<4 x s32>) = COPY %q0
+    ; CHECK:      %1(<4 x s32>) = G_AND %0, %0
+    %0(<4 x s32>) = COPY %q0
+    %1(<4 x s32>) = G_AND %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_or_s32
+name:            test_or_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_OR %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_OR %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_or_v4s32
+name:            test_or_v4s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %q0
+    ; CHECK:      %0(<4 x s32>) = COPY %q0
+    ; CHECK:      %1(<4 x s32>) = G_OR %0, %0
+    %0(<4 x s32>) = COPY %q0
+    %1(<4 x s32>) = G_OR %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_xor_s32
+name:            test_xor_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_XOR %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_XOR %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_xor_v4s32
+name:            test_xor_v4s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %q0
+    ; CHECK:      %0(<4 x s32>) = COPY %q0
+    ; CHECK:      %1(<4 x s32>) = G_XOR %0, %0
+    %0(<4 x s32>) = COPY %q0
+    %1(<4 x s32>) = G_XOR %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_shl_s32
+name:            test_shl_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_SHL %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_SHL %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_shl_v4s32
+name:            test_shl_v4s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %q0
+    ; CHECK:      %0(<4 x s32>) = COPY %q0
+    ; CHECK:      %1(<4 x s32>) = G_SHL %0, %0
+    %0(<4 x s32>) = COPY %q0
+    %1(<4 x s32>) = G_SHL %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_lshr_s32
+name:            test_lshr_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_LSHR %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_LSHR %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_ashr_s32
+name:            test_ashr_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_ASHR %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_ASHR %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_sdiv_s32
+name:            test_sdiv_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_SDIV %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_SDIV %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_udiv_s32
+name:            test_udiv_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s32) = G_UDIV %0, %0
+    %0(s32) = COPY %w0
+    %1(s32) = G_UDIV %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_anyext_s64_s32
+name:            test_anyext_s64_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s64) = G_ANYEXT %0
+    %0(s32) = COPY %w0
+    %1(s64) = G_ANYEXT %0
+...
+
+---
+# CHECK-LABEL: name: test_sext_s64_s32
+name:            test_sext_s64_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s64) = G_SEXT %0
+    %0(s32) = COPY %w0
+    %1(s64) = G_SEXT %0
+...
+
+---
+# CHECK-LABEL: name: test_zext_s64_s32
+name:            test_zext_s64_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s64) = G_ZEXT %0
+    %0(s32) = COPY %w0
+    %1(s64) = G_ZEXT %0
+...
+
+---
+# CHECK-LABEL: name: test_trunc_s32_s64
+name:            test_trunc_s32_s64
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %x0
+    ; CHECK:      %0(s64) = COPY %x0
+    ; CHECK:      %1(s32) = G_TRUNC %0
+    %0(s64) = COPY %x0
+    %1(s32) = G_TRUNC %0
+...
+
+---
+# CHECK-LABEL: name: test_constant_s32
+name:            test_constant_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+registers:
+  - { id: 0, class: _ }
+body: |
+  bb.0:
+    ; CHECK:      %0(s32) = G_CONSTANT 123
+    %0(s32) = G_CONSTANT 123
+...
+
+---
+# CHECK-LABEL: name: test_constant_p0
+name:            test_constant_p0
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+registers:
+  - { id: 0, class: _ }
+body: |
+  bb.0:
+    ; CHECK:      %0(p0) = G_CONSTANT 0
+    %0(p0) = G_CONSTANT 0
+...
+
+---
+# CHECK-LABEL: name: test_icmp_s32
+name:            test_icmp_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s1) = G_ICMP intpred(ne), %0(s32), %0
+    %0(s32) = COPY %w0
+    %1(s1) = G_ICMP intpred(ne), %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_icmp_p0
+name:            test_icmp_p0
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %x0
+    ; CHECK:      %0(p0) = COPY %x0
+    ; CHECK:      %1(s1) = G_ICMP intpred(ne), %0(p0), %0
+    %0(p0) = COPY %x0
+    %1(s1) = G_ICMP intpred(ne), %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_frame_index_p0
+name:            test_frame_index_p0
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+registers:
+  - { id: 0, class: _ }
+stack:
+  - { id: 0, name: ptr0, offset: 0, size: 8, alignment: 8 }
+body: |
+  bb.0:
+    ; CHECK:      %0(p0) = G_FRAME_INDEX %stack.0.ptr0
+    %0(p0) = G_FRAME_INDEX %stack.0.ptr0
+...
+
+---
+# CHECK-LABEL: name: test_ptrtoint_s64_p0
+name:            test_ptrtoint_s64_p0
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %x0
+    ; CHECK:      %0(p0) = COPY %x0
+    ; CHECK:      %1(s64) = G_PTRTOINT %0
+    %0(p0) = COPY %x0
+    %1(s64) = G_PTRTOINT %0
+...
+
+---
+# CHECK-LABEL: name: test_inttoptr_p0_s64
+name:            test_inttoptr_p0_s64
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %x0
+    ; CHECK:      %0(s64) = COPY %x0
+    ; CHECK:      %1(p0) = G_INTTOPTR %0
+    %0(s64) = COPY %x0
+    %1(p0) = G_INTTOPTR %0
+...
+
+---
+# CHECK-LABEL: name: test_load_s32_p0
+name:            test_load_s32_p0
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %x0
+    ; CHECK:      %0(p0) = COPY %x0
+    ; CHECK:      %1(s32) = G_LOAD %0
+    %0(p0) = COPY %x0
+    %1(s32) = G_LOAD %0
+...
+
+---
+# CHECK-LABEL: name: test_store_s32_p0
+name:            test_store_s32_p0
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %x0, %w1
+    ; CHECK:      %0(p0) = COPY %x0
+    ; CHECK:      %1(s32) = COPY %w1
+    ; CHECK:      G_STORE %1(s32), %0(p0)
+    %0(p0) = COPY %x0
+    %1(s32) = COPY %w1
+    G_STORE %1, %0
+...
+
+---
+# CHECK-LABEL: name: test_fadd_s32
+name:            test_fadd_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %s0
+    ; CHECK:      %0(s32) = COPY %s0
+    ; CHECK:      %1(s32) = G_FADD %0, %0
+    %0(s32) = COPY %s0
+    %1(s32) = G_FADD %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_fsub_s32
+name:            test_fsub_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %s0
+    ; CHECK:      %0(s32) = COPY %s0
+    ; CHECK:      %1(s32) = G_FSUB %0, %0
+    %0(s32) = COPY %s0
+    %1(s32) = G_FSUB %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_fmul_s32
+name:            test_fmul_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %s0
+    ; CHECK:      %0(s32) = COPY %s0
+    ; CHECK:      %1(s32) = G_FMUL %0, %0
+    %0(s32) = COPY %s0
+    %1(s32) = G_FMUL %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_fdiv_s32
+name:            test_fdiv_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %s0
+    ; CHECK:      %0(s32) = COPY %s0
+    ; CHECK:      %1(s32) = G_FDIV %0, %0
+    %0(s32) = COPY %s0
+    %1(s32) = G_FDIV %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_fpext_s64_s32
+name:            test_fpext_s64_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %s0
+    ; CHECK:      %0(s32) = COPY %s0
+    ; CHECK:      %1(s64) = G_FPEXT %0
+    %0(s32) = COPY %s0
+    %1(s64) = G_FPEXT %0
+...
+
+---
+# CHECK-LABEL: name: test_fptrunc_s32_s64
+name:            test_fptrunc_s32_s64
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %d0
+    ; CHECK:      %0(s64) = COPY %d0
+    ; CHECK:      %1(s32) = G_FPTRUNC %0
+    %0(s64) = COPY %d0
+    %1(s32) = G_FPTRUNC %0
+...
+
+---
+# CHECK-LABEL: name: test_fconstant_s32
+name:            test_fconstant_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+registers:
+  - { id: 0, class: _ }
+body: |
+  bb.0:
+    ; CHECK:      %0(s32) = G_FCONSTANT float 1.0
+    %0(s32) = G_FCONSTANT float 1.0
+...
+
+---
+# CHECK-LABEL: name: test_fcmp_s32
+name:            test_fcmp_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %s0
+    ; CHECK:      %0(s32) = COPY %s0
+    ; CHECK:      %1(s1) = G_FCMP floatpred(olt), %0(s32), %0
+    %0(s32) = COPY %s0
+    %1(s1) = G_FCMP floatpred(olt), %0, %0
+...
+
+---
+# CHECK-LABEL: name: test_sitofp_s64_s32
+name:            test_sitofp_s64_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %w0
+    ; CHECK:      %0(s32) = COPY %w0
+    ; CHECK:      %1(s64) = G_SITOFP %0
+    %0(s32) = COPY %w0
+    %1(s64) = G_SITOFP %0
+...
+
+---
+# CHECK-LABEL: name: test_uitofp_s32_s64
+name:            test_uitofp_s32_s64
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 1, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %x0
+    ; CHECK:      %0(s64) = COPY %x0
+    ; CHECK:      %1(s32) = G_UITOFP %0
+    %0(s64) = COPY %x0
+    %1(s32) = G_UITOFP %0
+...
+
+---
+# CHECK-LABEL: name: test_fptosi_s64_s32
+name:            test_fptosi_s64_s32
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %s0
+    ; CHECK:      %0(s32) = COPY %s0
+    ; CHECK:      %1(s64) = G_FPTOSI %0
+    %0(s32) = COPY %s0
+    %1(s64) = G_FPTOSI %0
+...
+
+---
+# CHECK-LABEL: name: test_fptoui_s32_s64
+name:            test_fptoui_s32_s64
+legalized:       true
+# CHECK: registers:
+# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 1, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body: |
+  bb.0:
+    liveins: %d0
+    ; CHECK:      %0(s64) = COPY %d0
+    ; CHECK:      %1(s32) = G_FPTOUI %0
+    %0(s64) = COPY %d0
+    %1(s32) = G_FPTOUI %0
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/translate-gep.ll b/test/CodeGen/AArch64/GlobalISel/translate-gep.ll
new file mode 100644
index 000000000000..14dbc7c3c31a
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/translate-gep.ll
@@ -0,0 +1,85 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -O0 -global-isel -stop-after=irtranslator -o - %s | FileCheck %s
+
+%type = type [4 x {i8, i32}]
+
+define %type* @first_offset_const(%type* %addr) {
+; CHECK-LABEL: name: first_offset_const
+; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[OFFSET:%[0-9]+]](s64) = G_CONSTANT i64 32
+; CHECK: [[RES:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET]](s64)
+; CHECK: %x0 = COPY [[RES]](p0)
+
+  %res = getelementptr %type, %type* %addr, i32 1
+  ret %type* %res
+}
+
+define %type* @first_offset_trivial(%type* %addr) {
+; CHECK-LABEL: name: first_offset_trivial
+; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[TRIVIAL:%[0-9]+]](p0) = COPY [[BASE]](p0)
+; CHECK: %x0 = COPY [[TRIVIAL]](p0)
+
+  %res = getelementptr %type, %type* %addr, i32 0
+  ret %type* %res
+}
+
+define %type* @first_offset_variable(%type* %addr, i64 %idx) {
+; CHECK-LABEL: name: first_offset_variable
+; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[IDX:%[0-9]+]](s64) = COPY %x1
+; CHECK: [[SIZE:%[0-9]+]](s64) = G_CONSTANT i64 32
+; CHECK: [[OFFSET:%[0-9]+]](s64) = G_MUL [[SIZE]], [[IDX]]
+; CHECK: [[STEP0:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET]](s64)
+; CHECK: [[RES:%[0-9]+]](p0) = COPY [[STEP0]](p0)
+; CHECK: %x0 = COPY [[RES]](p0)
+
+  %res = getelementptr %type, %type* %addr, i64 %idx
+  ret %type* %res
+}
+
+define %type* @first_offset_ext(%type* %addr, i32 %idx) {
+; CHECK-LABEL: name: first_offset_ext
+; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[IDX32:%[0-9]+]](s32) = COPY %w1
+; CHECK: [[SIZE:%[0-9]+]](s64) = G_CONSTANT i64 32
+; CHECK: [[IDX64:%[0-9]+]](s64) = G_SEXT [[IDX32]](s32)
+; CHECK: [[OFFSET:%[0-9]+]](s64) = G_MUL [[SIZE]], [[IDX64]]
+; CHECK: [[STEP0:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET]](s64)
+; CHECK: [[RES:%[0-9]+]](p0) = COPY [[STEP0]](p0)
+; CHECK: %x0 = COPY [[RES]](p0)
+
+  %res = getelementptr %type, %type* %addr, i32 %idx
+  ret %type* %res
+}
+
+%type1 = type [4 x [4 x i32]]
+define i32* @const_then_var(%type1* %addr, i64 %idx) {
+; CHECK-LABEL: name: const_then_var
+; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[IDX:%[0-9]+]](s64) = COPY %x1
+; CHECK: [[OFFSET1:%[0-9]+]](s64) = G_CONSTANT i64 272
+; CHECK: [[BASE1:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET1]](s64)
+; CHECK: [[SIZE:%[0-9]+]](s64) = G_CONSTANT i64 4
+; CHECK: [[OFFSET2:%[0-9]+]](s64) = G_MUL [[SIZE]], [[IDX]]
+; CHECK: [[BASE2:%[0-9]+]](p0) = G_GEP [[BASE1]], [[OFFSET2]](s64)
+; CHECK: [[RES:%[0-9]+]](p0) = COPY [[BASE2]](p0)
+; CHECK: %x0 = COPY [[RES]](p0)
+
+  %res = getelementptr %type1, %type1* %addr, i32 4, i32 1, i64 %idx
+  ret i32* %res
+}
+
+define i32* @var_then_const(%type1* %addr, i64 %idx) {
+; CHECK-LABEL: name: var_then_const
+; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[IDX:%[0-9]+]](s64) = COPY %x1
+; CHECK: [[SIZE:%[0-9]+]](s64) = G_CONSTANT i64 64
+; CHECK: [[OFFSET1:%[0-9]+]](s64) = G_MUL [[SIZE]], [[IDX]]
+; CHECK: [[BASE1:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET1]](s64)
+; CHECK: [[OFFSET2:%[0-9]+]](s64) = G_CONSTANT i64 40
+; CHECK: [[BASE2:%[0-9]+]](p0) = G_GEP [[BASE1]], [[OFFSET2]](s64)
+; CHECK: %x0 = COPY [[BASE2]](p0)
+
+  %res = getelementptr %type1, %type1* %addr, i64 %idx, i32 2, i32 2
+  ret i32* %res
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir b/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir
new file mode 100644
index 000000000000..9a2f7f7e54f8
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir
@@ -0,0 +1,22 @@
+# RUN: not llc -verify-machineinstrs -run-pass none -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test() { ret void }
+
+...
+---
+# CHECK: *** Bad machine code: Generic virtual register must have a bank in a RegBankSelected function ***
+# CHECK: instruction: %vreg0<def>(s64) = COPY
+# CHECK: operand 0: %vreg0<def>
+name:            test
+regBankSelected: true
+registers:
+  - { id: 0, class: _ }
+body: |
+  bb.0:
+   liveins: %x0
+   %0(s64) = COPY %x0
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/verify-selected.mir b/test/CodeGen/AArch64/GlobalISel/verify-selected.mir
new file mode 100644
index 000000000000..2149903d08a7
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/verify-selected.mir
@@ -0,0 +1,32 @@
+# RUN: not llc -verify-machineinstrs -run-pass none -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test() { ret void }
+
+...
+
+---
+name:            test
+regBankSelected: true
+selected: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: gpr }
+body: |
+  bb.0:
+   liveins: %x0
+   %0 = COPY %x0
+
+   ; CHECK: *** Bad machine code: Unexpected generic instruction in a Selected function ***
+   ; CHECK: instruction: %vreg1<def> = G_ADD
+   %1 = G_ADD %0, %0
+
+   ; CHECK: *** Bad machine code: Generic virtual register invalid in a Selected function ***
+   ; CHECK: instruction: %vreg2<def>(s64) = COPY
+   ; CHECK: operand 0: %vreg2<def>
+   %2(s64) = COPY %x0
+...
diff --git a/test/CodeGen/AArch64/Redundantstore.ll b/test/CodeGen/AArch64/Redundantstore.ll
index b2072682cd91..b7822a882b4a 100644
--- a/test/CodeGen/AArch64/Redundantstore.ll
+++ b/test/CodeGen/AArch64/Redundantstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -march=aarch64 < %s | FileCheck %s 
+; RUN: llc < %s -O3 -mtriple=aarch64-eabi | FileCheck %s 
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 @end_of_array = common global i8* null, align 8
diff --git a/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll b/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll
index 73200b581585..fb4df34df298 100644
--- a/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll
+++ b/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=arm64
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu
 ; Make sure we are not crashing on this test.
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64-unknown-linux-gnu"
 
 declare void @extern(i8*)
 
diff --git a/test/CodeGen/AArch64/aarch64-addv.ll b/test/CodeGen/AArch64/aarch64-addv.ll
index ca374eea28e7..91797c062b88 100644
--- a/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/test/CodeGen/AArch64/aarch64-addv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s
 
 define i8 @add_B(<16 x i8>* %arr)  {
 ; CHECK-LABEL: add_B
diff --git a/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
index 2170e4b902d4..51c32b409db5 100644
--- a/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
+++ b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
@@ -4,7 +4,7 @@
 ; test cases have been minimized as much as possible, but still most of the test
 ; cases could break if instruction scheduling heuristics for cortex-a53 change
 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=1 -stats 2>&1 \
-; RUN:  | FileCheck %s --check-prefix CHECK
+; RUN:  | FileCheck %s
 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=0 -stats 2>&1 \
 ; RUN:  | FileCheck %s --check-prefix CHECK-NOWORKAROUND
 ; The following run lines are just to verify whether or not this pass runs by
diff --git a/test/CodeGen/AArch64/aarch64-gep-opt.ll b/test/CodeGen/AArch64/aarch64-gep-opt.ll
index cae00a9b1cb3..6e4a47b04406 100644
--- a/test/CodeGen/AArch64/aarch64-gep-opt.ll
+++ b/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -1,8 +1,8 @@
-; RUN: llc -O3 -aarch64-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s
-; RUN: llc -O3 -aarch64-gep-opt=true -mattr=-use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
-; RUN: llc -O3 -aarch64-gep-opt=true -mattr=+use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
-; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
-; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -mattr=-use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -mattr=+use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-linux-gnueabi"
diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
index 845050156baa..347305abb67a 100644
--- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
+++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
@@ -280,3 +280,114 @@ define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
   %3 = extractelement <8 x i32> %1, i32 2
   ret i32 %3
 }
+
+; NEON-LABEL: store_general_mask_factor4:
+; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
+; NONEON-LABEL: store_general_mask_factor4:
+; NONEON-NOT: st4
+define void @store_general_mask_factor4(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <8 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
+  ret void
+}
+
+; NEON-LABEL: store_general_mask_factor4_undefbeg:
+; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
+; NONEON-LABEL: store_general_mask_factor4_undefbeg:
+; NONEON-NOT: st4
+define void @store_general_mask_factor4_undefbeg(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <8 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
+  ret void
+}
+
+; NEON-LABEL: store_general_mask_factor4_undefend:
+; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
+; NONEON-LABEL: store_general_mask_factor4_undefend:
+; NONEON-NOT: st4
+define void @store_general_mask_factor4_undefend(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <8 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
+  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
+  ret void
+}
+
+; NEON-LABEL: store_general_mask_factor4_undefmid:
+; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
+; NONEON-LABEL: store_general_mask_factor4_undefmid:
+; NONEON-NOT: st4
+define void @store_general_mask_factor4_undefmid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <8 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
+  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
+  ret void
+}
+
+; NEON-LABEL: store_general_mask_factor4_undefmulti:
+; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
+; NONEON-LABEL: store_general_mask_factor4_undefmulti:
+; NONEON-NOT: st4
+define void @store_general_mask_factor4_undefmulti(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <8 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
+  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
+  ret void
+}
+
+; NEON-LABEL: store_general_mask_factor3:
+; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
+; NONEON-LABEL: store_general_mask_factor3:
+; NONEON-NOT: st3
+define void @store_general_mask_factor3(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
+  ret void
+}
+
+; NEON-LABEL: store_general_mask_factor3_undefmultimid:
+; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
+; NONEON-LABEL: store_general_mask_factor3_undefmultimid:
+; NONEON-NOT: st3
+define void @store_general_mask_factor3_undefmultimid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
+  ret void
+}
+
+; NEON-LABEL: store_general_mask_factor3_undef_fail:
+; NEON-NOT: st3
+; NONEON-LABEL: store_general_mask_factor3_undef_fail:
+; NONEON-NOT: st3
+define void @store_general_mask_factor3_undef_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
+  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
+  ret void
+}
+
+; NEON-LABEL: store_general_mask_factor3_undeflane:
+; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
+; NONEON-LABEL: store_general_mask_factor3_undeflane:
+; NONEON-NOT: st3
+define void @store_general_mask_factor3_undeflane(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
+  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
+  ret void
+}
+
+; NEON-LABEL: store_general_mask_factor3_negativestart:
+; NEON-NOT: st3
+; NONEON-LABEL: store_general_mask_factor3_negativestart:
+; NONEON-NOT: st3
+define void @store_general_mask_factor3_negativestart(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
+  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
index 84277995ce5b..1b2ed4b89521 100644
--- a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
+++ b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -aarch64-gep-opt=true  -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck <%t %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true  -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck <%t %s
 ; REQUIRES: asserts
 target triple = "aarch64--linux-android"
 
diff --git a/test/CodeGen/AArch64/aarch64-minmaxv.ll b/test/CodeGen/AArch64/aarch64-minmaxv.ll
index fb13b706cfaf..9a56cd6ae7c0 100644
--- a/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -1,7 +1,6 @@
-; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux--gnu -aarch64-neon-syntax=generic | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64-linu--gnu"
 
 ; CHECK-LABEL: smax_B
 ; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
diff --git a/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/test/CodeGen/AArch64/aarch64-stp-cluster.ll
index 5cab38eafb52..fe5abbf15eff 100644
--- a/test/CodeGen/AArch64/aarch64-stp-cluster.ll
+++ b/test/CodeGen/AArch64/aarch64-stp-cluster.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -aarch64-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -aarch64-enable-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s
 
 ; CHECK: ********** MI Scheduling **********
 ; CHECK-LABEL: stp_i64_scale:BB#0
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
index f30ab89f238b..df1b9fe7855f 100644
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
 
 @var8 = global i8 0
 @var16 = global i16 0
@@ -274,19 +274,20 @@ define void @sub_i16rhs() minsize {
 ; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
 ; example), but the remaining instructions are probably not idiomatic
 ; in the face of "add/sub (shifted register)" so I don't intend to.
-define void @addsub_i32rhs() minsize {
+define void @addsub_i32rhs(i32 %in32) minsize {
 ; CHECK-LABEL: addsub_i32rhs:
     %val32_tmp = load i32, i32* @var32
     %lhs64 = load i64, i64* @var64
 
     %val32 = add i32 %val32_tmp, 123
 
-    %rhs64_zext = zext i32 %val32 to i64
+    %rhs64_zext = zext i32 %in32 to i64
     %res64_zext = add i64 %lhs64, %rhs64_zext
     store volatile i64 %res64_zext, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
 
-    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %rhs64_zext2 = zext i32 %val32 to i64
+    %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
     %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
     store volatile i64 %res64_zext_shift, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
@@ -304,19 +305,20 @@ define void @addsub_i32rhs() minsize {
     ret void
 }
 
-define void @sub_i32rhs() minsize {
+define void @sub_i32rhs(i32 %in32) minsize {
 ; CHECK-LABEL: sub_i32rhs:
     %val32_tmp = load i32, i32* @var32
     %lhs64 = load i64, i64* @var64
 
     %val32 = add i32 %val32_tmp, 123
 
-    %rhs64_zext = zext i32 %val32 to i64
+    %rhs64_zext = zext i32 %in32 to i64
     %res64_zext = sub i64 %lhs64, %rhs64_zext
     store volatile i64 %res64_zext, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
 
-    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %rhs64_zext2 = zext i32 %val32 to i64
+    %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
     %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
     store volatile i64 %res64_zext_shift, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
@@ -333,3 +335,98 @@ define void @sub_i32rhs() minsize {
 
     ret void
 }
+
+; Check that implicit zext from w reg write is used instead of uxtw form of add.
+define i64 @add_fold_uxtw(i32 %x, i64 %y) {
+; CHECK-LABEL: add_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: add x0, x1, x[[TMP]]
+  %ret = add i64 %y, %ext
+  ret i64 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of sub and that mov WZR is folded to form a neg instruction.
+define i64 @sub_fold_uxtw_xzr(i32 %x)  {
+; CHECK-LABEL: sub_fold_uxtw_xzr:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: neg x0, x[[TMP]]
+  %ret = sub i64 0, %ext
+  ret i64 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
+define i1 @cmp_fold_uxtw(i32 %x, i64 %y) {
+; CHECK-LABEL: cmp_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: cmp x1, x[[TMP]]
+; CHECK-NEXT: cset
+  %ret = icmp eq i64 %y, %ext
+  ret i1 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of add, leading to madd selection.
+define i64 @madd_fold_uxtw(i32 %x, i64 %y) {
+; CHECK-LABEL: madd_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: madd x0, x1, x1, x[[TMP]]
+  %mul = mul i64 %y, %y
+  %ret = add i64 %mul, %ext
+  ret i64 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of sub, leading to sub/cmp folding.
+; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
+define i1 @cmp_sub_fold_uxtw(i32 %x, i64 %y, i64 %z) {
+; CHECK-LABEL: cmp_sub_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: cmp x[[TMP2:[0-9]+]], x[[TMP]]
+; CHECK-NEXT: cset
+  %sub = sub i64 %z, %ext
+  %ret = icmp eq i64 %sub, 0
+  ret i1 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of add and add of -1 gets selected as sub.
+define i64 @add_imm_fold_uxtw(i32 %x) {
+; CHECK-LABEL: add_imm_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: sub x0, x[[TMP]], #1
+  %ret = add i64 %ext, -1
+  ret i64 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of add and add lsl form gets selected.
+define i64 @add_lsl_fold_uxtw(i32 %x, i64 %y) {
+; CHECK-LABEL: add_lsl_fold_uxtw:
+entry:
+; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3
+  %m = or i32 %x, 3
+  %ext = zext i32 %m to i64
+  %shift = shl i64 %y, 3
+; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3
+  %ret = add i64 %ext, %shift
+  ret i64 %ret
+}
diff --git a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
index caafde0a1bb2..bc55c1d9251f 100644
--- a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
@@ -2,43 +2,49 @@
 
 ; rdar://9146594
 
-define void @drt_vsprintf() nounwind ssp {
+source_filename = "test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll"
+
+; Function Attrs: nounwind ssp
+define void @drt_vsprintf() #0 {
 entry:
   %do_tab_convert = alloca i32, align 4
-  br i1 undef, label %if.then24, label %if.else295, !dbg !13
+  br i1 undef, label %if.then24, label %if.else295, !dbg !11
 
 if.then24:                                        ; preds = %entry
   unreachable
 
 if.else295:                                       ; preds = %entry
-  call void @llvm.dbg.declare(metadata i32* %do_tab_convert, metadata !16, metadata !DIExpression()), !dbg !18
-  store i32 0, i32* %do_tab_convert, align 4, !dbg !19
+  call void @llvm.dbg.declare(metadata i32* %do_tab_convert, metadata !14, metadata !16), !dbg !17
+  store i32 0, i32* %do_tab_convert, align 4, !dbg !18
   unreachable
 }
 
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp }
+attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2)
+!1 = !DIFile(filename: "print.i", directory: "/Volumes/Ebi/echeng/radars/r9146594")
+!2 = !{!3}
+!3 = !DIGlobalVariableExpression(var: !4)
+!4 = !DIGlobalVariable(name: "vsplive", scope: !5, file: !1, line: 617, type: !8, isLocal: true, isDefinition: true)
+!5 = distinct !DISubprogram(name: "drt_vsprintf", scope: !1, file: !1, line: 616, type: !6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{i32 2, !"Dwarf Version", i32 2}
+!11 = !DILocation(line: 653, column: 5, scope: !12)
+!12 = distinct !DILexicalBlock(scope: !13, file: !1, line: 652, column: 35)
+!13 = distinct !DILexicalBlock(scope: !5, file: !1, line: 616, column: 1)
+!14 = !DILocalVariable(name: "do_tab_convert", scope: !15, file: !1, line: 853, type: !8)
+!15 = distinct !DILexicalBlock(scope: !12, file: !1, line: 850, column: 12)
+!16 = !DIExpression()
+!17 = !DILocation(line: 853, column: 11, scope: !15)
+!18 = !DILocation(line: 853, column: 29, scope: !15)
 
-!0 = !DIGlobalVariable(name: "vsplive", line: 617, isLocal: true, isDefinition: true, scope: !1, file: !2, type: !6)
-!1 = distinct !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !4)
-!2 = !DIFile(filename: "print.i", directory: "/Volumes/Ebi/echeng/radars/r9146594")
-!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: FullDebug, file: !20, enums: !21, retainedTypes: !21, globals: !{!0})
-!4 = !DISubroutineType(types: !5)
-!5 = !{!6}
-!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!7 = distinct !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !8)
-!8 = !DISubroutineType(types: !9)
-!9 = !{null}
-!10 = distinct !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !4)
-!11 = distinct !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !4)
-!12 = distinct !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !8)
-!13 = !DILocation(line: 653, column: 5, scope: !14)
-!14 = distinct !DILexicalBlock(line: 652, column: 35, file: !20, scope: !15)
-!15 = distinct !DILexicalBlock(line: 616, column: 1, file: !20, scope: !1)
-!16 = !DILocalVariable(name: "do_tab_convert", line: 853, scope: !17, file: !2, type: !6)
-!17 = distinct !DILexicalBlock(line: 850, column: 12, file: !20, scope: !14)
-!18 = !DILocation(line: 853, column: 11, scope: !17)
-!19 = !DILocation(line: 853, column: 29, scope: !17)
-!20 = !DIFile(filename: "print.i", directory: "/Volumes/Ebi/echeng/radars/r9146594")
-!21 = !{i32 0}
diff --git a/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll b/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
index 491433ce71f7..72213bbcf967 100644
--- a/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 define void @foo(i64 %val) {
 ; CHECK: foo
 ;   The stack frame store is not 64-bit aligned. Make sure we use an
diff --git a/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll b/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
index 8d0b1b6f84cc..b8855fb5cdb3 100644
--- a/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64
+; RUN: llc < %s -mtriple=arm64-eabi
 
 ; The target lowering for integer comparisons was replacing some DAG nodes
 ; during operation legalization, which resulted in dangling pointers,
diff --git a/test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll b/test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll
index a4d37e48685f..a50910029257 100644
--- a/test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define i32 @foo(<4 x i32> %a, i32 %n) nounwind {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll
index d59b0d004380..b38b4f2a2b22 100644
--- a/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march arm64 -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s
 ; <rdar://problem/11294426>
 
 @b = private unnamed_addr constant [3 x i32] [i32 1768775988, i32 1685481784, i32 1836253201], align 4
diff --git a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
index b760261f7881..369b94be94c5 100644
--- a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
+++ b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=arm64 -O0 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=arm64 -O3 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -O0 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -O3 -verify-machineinstrs | FileCheck %s
 
 @.str = private unnamed_addr constant [9 x i8] c"%lf %lu\0A\00", align 1
 @.str1 = private unnamed_addr constant [8 x i8] c"%lf %u\0A\00", align 1
diff --git a/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll b/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll
index e2c43d953bb9..9b08538ad6e9 100644
--- a/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll
+++ b/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -aarch64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
 
 ;FAST-LABEL: _Z9example25v:
 ;FAST: fcmgt.4s
diff --git a/test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll b/test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll
index 94511243a49f..4d78b3313530 100644
--- a/test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll
+++ b/test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64
+; RUN: llc < %s -mtriple=arm64-eabi
 ; Make sure we are not crashing on this test.
 
 define void @autogen_SD13158() {
diff --git a/test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll b/test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll
index 404027bfd5f3..9b1dec1ac892 100644
--- a/test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll
+++ b/test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64
+; RUN: llc < %s -mtriple=arm64-eabi
 
 ; Make sure we are not crashing on this test.
 
diff --git a/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll b/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll
index a350ba1472c9..c13b65d34a1a 100644
--- a/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll
+++ b/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple
 
 ;CHECK-LABEL: Shuff:
 ;CHECK: tbl.8b
diff --git a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
index 6d8c639adb95..649bc25b7265 100644
--- a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
+++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
-; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
-; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
-; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
 
 define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
 ; CHECK-LABEL: bar:
diff --git a/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll b/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll
index a73b70718019..226026faf320 100644
--- a/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll
+++ b/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64
+; RUN: llc < %s -mtriple=arm64-eabi
 
 ; This test case tests an infinite loop bug in DAG combiner.
 ; It just tries to do the following replacing endlessly:
@@ -20,4 +20,4 @@ entry:
   %sext = shl <4 x i32> %mul.i, <i32 16, i32 16, i32 16, i32 16>
   %vmovl.i.i = ashr <4 x i32> %sext, <i32 16, i32 16, i32 16, i32 16>
   ret <4 x i32> %vmovl.i.i
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
index 1bb47fc00b2b..5a1eabc2ee6c 100644
--- a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
+++ b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs < %s | FileCheck %s
 
 ; The following 2 test cases test shufflevector with beginning UNDEF mask.
 define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) {
diff --git a/test/CodeGen/AArch64/arm64-abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll
index c92703651385..a29f8c4b57ab 100644
--- a/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
-target triple = "arm64-apple-ios7.0.0"
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -mcpu=cyclone -enable-misched=false | FileCheck %s
 
 ; rdar://13625505
 ; Here we have 9 fixed integer arguments the 9th argument in on stack, the
diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll
index e76adb4abc02..b2ea9ad3b4a1 100644
--- a/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s
-; RUN: llc < %s -O0 -disable-fp-elim | FileCheck -check-prefix=FAST %s
-target triple = "arm64-apple-darwin"
+; RUN: llc < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim | FileCheck -check-prefix=FAST %s
 
 ; rdar://12648441
 ; Generated from arm64-arguments.c with -O2.
diff --git a/test/CodeGen/AArch64/arm64-addp.ll b/test/CodeGen/AArch64/arm64-addp.ll
index 3f1e5c5d44e3..982ce0a73a34 100644
--- a/test/CodeGen/AArch64/arm64-addp.ll
+++ b/test/CodeGen/AArch64/arm64-addp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
 
 define double @foo(<2 x double> %a) nounwind {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
index 3197f5bd27ec..6eaf75c4fb96 100644
--- a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
+++ b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -mtriple arm64-apple-ios3 -aarch64-gep-opt=false %s -o - | FileCheck %s
+; RUN: llc -O3 -mtriple arm64-apple-ios3 -aarch64-enable-gep-opt=false %s -o - | FileCheck %s
 ; <rdar://problem/13621857>
 
 @block = common global i8* null, align 8
diff --git a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
index d46800d34cac..c57be5684ade 100644
--- a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@@ -1,9 +1,8 @@
-; RUN: llc -march arm64 < %s -aarch64-collect-loh=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios3.0.0 -aarch64-enable-collect-loh=false | FileCheck %s
 ; rdar://13452552
 ; Disable the collecting of LOH so that the labels do not get in the
 ; way of the NEXT patterns.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios3.0.0"
 
 @block = common global i8* null, align 8
 
diff --git a/test/CodeGen/AArch64/arm64-addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll
index 0e651a910d7b..e8fc4e68fcbe 100644
--- a/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-eabi < %s | FileCheck %s
 ; rdar://10232252
 
 @object = external hidden global i64, section "__DATA, __objc_ivar", align 8
diff --git a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
index 36424506bee8..a3b740df9b4e 100644
--- a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
+++ b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-eabi -mcpu=cyclone < %s | FileCheck %s
 
 ; CHECK: foo
 ; CHECK: str w[[REG0:[0-9]+]], [x19, #264]
diff --git a/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll b/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
index 71e64807f524..f528c9cfabf4 100644
--- a/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
+++ b/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
@@ -1,7 +1,6 @@
-; RUN: llc -O1 -march=arm64 -enable-andcmp-sinking=true < %s | FileCheck %s
+; RUN: llc -O1 -mtriple=arm64-apple-ios7.0.0 -enable-andcmp-sinking=true < %s | FileCheck %s
 ; ModuleID = 'and-cbz-extr-mr.bc'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
 
 define zeroext i1 @foo(i1 %IsEditable, i1 %isTextField, i8* %str1, i8* %str2, i8* %str3, i8* %str4, i8* %str5, i8* %str6, i8* %str7, i8* %str8, i8* %str9, i8* %str10, i8* %str11, i8* %str12, i8* %str13, i32 %int1, i8* %str14) unnamed_addr #0 align 2 {
 ; CHECK: _foo:
@@ -14,7 +13,7 @@ entry:
 if.end:                                           ; preds = %entry
   %and.i.i.i = and i32 %int1, 4
   %tobool.i.i.i = icmp eq i32 %and.i.i.i, 0
-  br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i
+  br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i, !prof !1
 
 land.rhs.i:                                       ; preds = %if.end
   %cmp.i.i.i = icmp eq i8* %str12, %str13
@@ -37,7 +36,7 @@ if.then3:                                         ; preds = %_ZNK7WebCore4Node10
 if.end5:                                          ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %lor.rhs.i.i.i
 ; CHECK: %if.end5
 ; CHECK: tbz
-  br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19
+  br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19, !prof !1
 
 land.rhs.i19:                                     ; preds = %if.end5
   %cmp.i.i.i18 = icmp eq i8* %str6, %str7
@@ -70,3 +69,4 @@ return:                                           ; preds = %if.end12, %if.then9
 }
 
 attributes #0 = { nounwind ssp }
+!1 = !{!"branch_weights", i32 3, i32 5}
diff --git a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
index 38661a5f38f3..87826fdbcb8b 100644
--- a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
+++ b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - -aarch64-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc %s -o - -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
 ; Check that ANDS (tst) is not merged with ADD when the immediate
 ; is not 0.
 ; <rdar://problem/16693089>
diff --git a/test/CodeGen/AArch64/arm64-anyregcc.ll b/test/CodeGen/AArch64/arm64-anyregcc.ll
index 2a2f45196046..1af310383243 100644
--- a/test/CodeGen/AArch64/arm64-anyregcc.ll
+++ b/test/CodeGen/AArch64/arm64-anyregcc.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: .section	__LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 2
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -17,20 +17,28 @@
 ; Functions and stack size
 ; CHECK-NEXT:   .quad _test
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _property_access1
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _property_access2
 ; CHECK-NEXT:   .quad 32
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _property_access3
 ; CHECK-NEXT:   .quad 32
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _anyreg_test1
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _anyreg_test2
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _patchpoint_spilldef
 ; CHECK-NEXT:   .quad 112
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _patchpoint_spillargs
 ; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad 1
 
 
 ; test
diff --git a/test/CodeGen/AArch64/arm64-arith-saturating.ll b/test/CodeGen/AArch64/arm64-arith-saturating.ll
index 78cd1fcb1a21..20cf792ce9c3 100644
--- a/test/CodeGen/AArch64/arm64-arith-saturating.ll
+++ b/test/CodeGen/AArch64/arm64-arith-saturating.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s
 
 define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: qadds:
diff --git a/test/CodeGen/AArch64/arm64-arith.ll b/test/CodeGen/AArch64/arm64-arith.ll
index d5d9a1b98174..bf4990d3c9b5 100644
--- a/test/CodeGen/AArch64/arm64-arith.ll
+++ b/test/CodeGen/AArch64/arm64-arith.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll b/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll
index 0904b62c4032..85aa9c44305f 100644
--- a/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll
+++ b/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll
@@ -1,7 +1,6 @@
-; RUN: llc -march=arm64 -aarch64-dead-def-elimination=false < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios7.0.0 -aarch64-enable-dead-defs=false < %s | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
 
 ; Function Attrs: nounwind ssp uwtable
 define i32 @test1() #0 {
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
index d7188f31c567..21e3c768ee69 100644
--- a/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s
 
 @var = global i128 0
 
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
index fef137b1023f..c87103481adf 100644
--- a/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/test/CodeGen/AArch64/arm64-atomic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -asm-verbose=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
 
 define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
 ; CHECK-LABEL: val_compare_and_swap:
diff --git a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
index 876a69193b47..6f88212cd39d 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -O1 -o - | FileCheck %s
-; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -O0 -fast-isel=true -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-enable-ldst-opt=false -O1 -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-enable-ldst-opt=false -O0 -fast-isel=true -o - | FileCheck %s
 
 ; CHECK-LABEL: test_i64_f64:
 define void @test_i64_f64(double* %p, i64* %q) {
diff --git a/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
index cc9badc5c552..52d269d37730 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
-; RUN: llc -mtriple aarch64_be < %s -fast-isel=true -aarch64-load-store-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-enable-ldst-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -fast-isel=true -aarch64-enable-ldst-opt=false -o - | FileCheck %s
 
 ; CHECK-LABEL: test_i64_f64:
 define i64 @test_i64_f64(double %p) {
diff --git a/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
index d08976788e91..a1dec896d34a 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
-; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-enable-ldst-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-enable-ldst-opt=false -fast-isel=true -O0 -o - | FileCheck %s
 
 ; Note, we split the functions in to multiple BBs below to isolate the call
 ; instruction we want to test, from fast-isel failing to select instructions
diff --git a/test/CodeGen/AArch64/arm64-big-imm-offsets.ll b/test/CodeGen/AArch64/arm64-big-imm-offsets.ll
index a56df07a49ac..f2b682931600 100644
--- a/test/CodeGen/AArch64/arm64-big-imm-offsets.ll
+++ b/test/CodeGen/AArch64/arm64-big-imm-offsets.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 < %s
+; RUN: llc -mtriple=arm64-eabi < %s
 
 
 ; Make sure large offsets aren't mistaken for valid immediate offsets.
diff --git a/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index 402e16ccdb21..339dbbe18fc0 100644
--- a/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -codegenprepare -mtriple=arm64-apple=ios -S -o - %s | FileCheck --check-prefix=OPT %s
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 %struct.X = type { i8, i8, [2 x i8] }
 %struct.Y = type { i32, i8 }
 %struct.Z = type { i8, i8, [2 x i8], i16 }
@@ -530,3 +530,33 @@ define i16 @test_ignored_rightbits(i32 %dst, i32 %in) {
 
   ret i16 %conv19
 }
+
+; The following test excercises the case where we have a BFI
+; instruction with the same input in both operands. We need to
+; track the useful bits through both operands.
+; CHECK-LABEL: sameOperandBFI
+; CHECK: lsr
+; CHECK: and
+; CHECK: bfi
+; CHECK: bfi
+define void @sameOperandBFI(i64 %src, i64 %src2, i16 *%ptr) {
+entry:
+  %shr47 = lshr i64 %src, 47
+  %src2.trunc = trunc i64 %src2 to i32
+  br i1 undef, label %end, label %if.else
+
+if.else:
+  %and3 = and i32 %src2.trunc, 3
+  %shl2 = shl nuw nsw i64 %shr47, 2
+  %shl2.trunc = trunc i64 %shl2 to i32
+  %and12 = and i32 %shl2.trunc, 12
+  %BFISource = or i32 %and3, %and12         ; ...00000ABCD
+  %BFIRHS = shl nuw nsw i32 %BFISource, 4   ; ...0ABCD0000
+  %BFI = or i32 %BFIRHS, %BFISource         ; ...0ABCDABCD
+  %BFItrunc = trunc i32 %BFI to i16
+  store i16 %BFItrunc, i16* %ptr, align 4
+  br label %end
+
+end:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll
index 1a6c3687dcb0..4bf15ea2393e 100644
--- a/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 ; Check that building up a vector w/ only one non-zero lane initializes
 ; intelligently.
diff --git a/test/CodeGen/AArch64/arm64-builtins-linux.ll b/test/CodeGen/AArch64/arm64-builtins-linux.ll
index 6caf3a2a18ef..64239582f230 100644
--- a/test/CodeGen/AArch64/arm64-builtins-linux.ll
+++ b/test/CodeGen/AArch64/arm64-builtins-linux.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 ; Function Attrs: nounwind readnone
 declare i8* @llvm.thread.pointer() #1
diff --git a/test/CodeGen/AArch64/arm64-call-tailcalls.ll b/test/CodeGen/AArch64/arm64-call-tailcalls.ll
index 6621db25da5b..7a91f05b8dd2 100644
--- a/test/CodeGen/AArch64/arm64-call-tailcalls.ll
+++ b/test/CodeGen/AArch64/arm64-call-tailcalls.ll
@@ -89,3 +89,12 @@ declare void @foo() nounwind
 declare i32 @a(i32)
 declare i32 @b(i32)
 declare i32 @c(i32)
+
+; CHECK-LABEL: tswift:
+; CHECK: b _swiftfunc
+define swiftcc i32 @tswift(i32 %a) nounwind {
+  %res = tail call i32 @swiftfunc(i32 %a)
+  ret i32 %res
+}
+
+declare swiftcc i32 @swiftfunc(i32) nounwind
diff --git a/test/CodeGen/AArch64/arm64-cast-opt.ll b/test/CodeGen/AArch64/arm64-cast-opt.ll
index 463add5688e3..2f5d16b25795 100644
--- a/test/CodeGen/AArch64/arm64-cast-opt.ll
+++ b/test/CodeGen/AArch64/arm64-cast-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -march=arm64 -mtriple arm64-apple-ios5.0.0 < %s | FileCheck %s
+; RUN: llc -O3 -mtriple arm64-apple-ios5.0.0 < %s | FileCheck %s
 ; <rdar://problem/15992732>
 ; Zero truncation is not necessary when the values are extended properly
 ; already.
diff --git a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
index 25d874e54cb7..fa2343152f72 100644
--- a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-ccmp | FileCheck %s
+; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp | FileCheck %s
 target triple = "arm64-apple-ios7.0.0"
 
 @channelColumns = external global i64
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll
index 748bbcca079f..2682fa7dcce1 100644
--- a/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-ccmp -aarch64-stress-ccmp | FileCheck %s
+; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp | FileCheck %s
 target triple = "arm64-apple-ios"
 
 ; CHECK: single_same
diff --git a/test/CodeGen/AArch64/arm64-clrsb.ll b/test/CodeGen/AArch64/arm64-clrsb.ll
index 042e52e5e781..02368cb4a4c4 100644
--- a/test/CodeGen/AArch64/arm64-clrsb.ll
+++ b/test/CodeGen/AArch64/arm64-clrsb.ll
@@ -1,7 +1,6 @@
-; RUN: llc < %s -march=arm64 |  FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 |  FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
 
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.ctlz.i32(i32, i1) #0
diff --git a/test/CodeGen/AArch64/arm64-coalesce-ext.ll b/test/CodeGen/AArch64/arm64-coalesce-ext.ll
index 9420bf3bb593..d5064f6d16e6 100644
--- a/test/CodeGen/AArch64/arm64-coalesce-ext.ll
+++ b/test/CodeGen/AArch64/arm64-coalesce-ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin < %s | FileCheck %s
 ; Check that the peephole optimizer knows about sext and zext instructions.
 ; CHECK: test1sext
 define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
diff --git a/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
index e34ef39bcfec..4a3696501fd8 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s
 ; Check that the LOH analysis does not crash when the analysed chained
 ; contains instructions that are filtered out.
 ;
diff --git a/test/CodeGen/AArch64/arm64-collect-loh-str.ll b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
index 8889cb4bf52a..e3df4182ddca 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh-str.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
 ; Test case for <rdar://problem/15942912>.
 ; AdrpAddStr cannot be used when the store uses same
 ; register as address and value. Indeed, the related
diff --git a/test/CodeGen/AArch64/arm64-collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll
index 3fc0d45f065c..b697b6eced3d 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
-; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF
+; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF
 
 ; CHECK-ELF-NOT: .loh
 ; CHECK-ELF-NOT: AdrpAdrp
diff --git a/test/CodeGen/AArch64/arm64-complex-ret.ll b/test/CodeGen/AArch64/arm64-complex-ret.ll
index 93d50a59861d..250edac553c7 100644
--- a/test/CodeGen/AArch64/arm64-complex-ret.ll
+++ b/test/CodeGen/AArch64/arm64-complex-ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -o - %s | FileCheck %s
+; RUN: llc -mtriple=arm64-eabi -o - %s | FileCheck %s
 
 define { i192, i192, i21, i192 } @foo(i192) {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index ed061122f311..b9dbfc7745f8 100644
--- a/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 
 define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
diff --git a/test/CodeGen/AArch64/arm64-crc32.ll b/test/CodeGen/AArch64/arm64-crc32.ll
index d3099e6bb132..22111de5a3aa 100644
--- a/test/CodeGen/AArch64/arm64-crc32.ll
+++ b/test/CodeGen/AArch64/arm64-crc32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mattr=+crc -o - %s | FileCheck %s
+; RUN: llc -mtriple=arm64-eabi -mattr=+crc -o - %s | FileCheck %s
 
 define i32 @test_crc32b(i32 %cur, i8 %next) {
 ; CHECK-LABEL: test_crc32b:
diff --git a/test/CodeGen/AArch64/arm64-crypto.ll b/test/CodeGen/AArch64/arm64-crypto.ll
index 2908b336b1bd..615f2a8ecdca 100644
--- a/test/CodeGen/AArch64/arm64-crypto.ll
+++ b/test/CodeGen/AArch64/arm64-crypto.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mattr=crypto -aarch64-neon-syntax=apple -o - %s | FileCheck %s
+; RUN: llc -mtriple=arm64-eabi -mattr=crypto -aarch64-neon-syntax=apple -o - %s | FileCheck %s
 
 declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
 declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll
index 8d4bf5dbeb75..030857df7779 100644
--- a/test/CodeGen/AArch64/arm64-cse.ll
+++ b/test/CodeGen/AArch64/arm64-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 -aarch64-gep-opt=false -verify-machineinstrs | FileCheck %s
+; RUN: llc -O3 < %s -aarch64-enable-atomic-cfg-tidy=0 -aarch64-enable-gep-opt=false -verify-machineinstrs | FileCheck %s
 target triple = "arm64-apple-ios"
 
 ; rdar://12462006
diff --git a/test/CodeGen/AArch64/arm64-csel.ll b/test/CodeGen/AArch64/arm64-csel.ll
index 98eba30f119d..3e246105f057 100644
--- a/test/CodeGen/AArch64/arm64-csel.ll
+++ b/test/CodeGen/AArch64/arm64-csel.ll
@@ -228,3 +228,43 @@ entry:
   %inc.c = add i64 %inc, %c
   ret i64 %inc.c
 }
+
+define i32 @foo20(i32 %x) {
+; CHECK-LABEL: foo20:
+; CHECK: cmp w0, #5
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x6
+; CHECK: csinc w0, w[[REG]], wzr, eq
+  %cmp = icmp eq i32 %x, 5
+  %res = select i1 %cmp, i32 6, i32 1
+  ret i32 %res
+}
+
+define i64 @foo21(i64 %x) {
+; CHECK-LABEL: foo21:
+; CHECK: cmp x0, #5
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x6
+; CHECK: csinc x0, x[[REG]], xzr, eq
+  %cmp = icmp eq i64 %x, 5
+  %res = select i1 %cmp, i64 6, i64 1
+  ret i64 %res
+}
+
+define i32 @foo22(i32 %x) {
+; CHECK-LABEL: foo22:
+; CHECK: cmp w0, #5
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x6
+; CHECK: csinc w0, w[[REG]], wzr, ne
+  %cmp = icmp eq i32 %x, 5
+  %res = select i1 %cmp, i32 1, i32 6
+  ret i32 %res
+}
+
+define i64 @foo23(i64 %x) {
+; CHECK-LABEL: foo23:
+; CHECK: cmp x0, #5
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x6
+; CHECK: csinc x0, x[[REG]], xzr, ne
+  %cmp = icmp eq i64 %x, 5
+  %res = select i1 %cmp, i64 1, i64 6
+  ret i64 %res
+}
diff --git a/test/CodeGen/AArch64/arm64-csldst-mmo.ll b/test/CodeGen/AArch64/arm64-csldst-mmo.ll
index 0b8f7a19b484..4930c493d62c 100644
--- a/test/CodeGen/AArch64/arm64-csldst-mmo.ll
+++ b/test/CodeGen/AArch64/arm64-csldst-mmo.ll
@@ -13,9 +13,9 @@
 ; CHECK: SU(2):   STRWui %WZR
 ; CHECK: SU(3):   %X21<def>, %X20<def> = LDPXi %SP
 ; CHECK:  Predecessors:
-; CHECK-NEXT:   out SU(0)
-; CHECK-NEXT:   out SU(0)
-; CHECK-NEXT:   ch  SU(0)
+; CHECK-NEXT:   out  SU(0)
+; CHECK-NEXT:   out  SU(0)
+; CHECK-NEXT:   ord  SU(0)
 ; CHECK-NEXT:  Successors:
 define void @test1() {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-cvt.ll b/test/CodeGen/AArch64/arm64-cvt.ll
index 420a8bc04833..e76549677188 100644
--- a/test/CodeGen/AArch64/arm64-cvt.ll
+++ b/test/CodeGen/AArch64/arm64-cvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 ;
 ; Floating-point scalar convert to signed integer (to nearest with ties to away)
diff --git a/test/CodeGen/AArch64/arm64-dead-def-frame-index.ll b/test/CodeGen/AArch64/arm64-dead-def-frame-index.ll
index 9bb4b7120763..0be3fb12f5ad 100644
--- a/test/CodeGen/AArch64/arm64-dead-def-frame-index.ll
+++ b/test/CodeGen/AArch64/arm64-dead-def-frame-index.ll
@@ -1,7 +1,6 @@
-; RUN: llc -march=arm64 < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
 
 ; Function Attrs: nounwind ssp uwtable
 define i32 @test1() #0 {
diff --git a/test/CodeGen/AArch64/arm64-dup.ll b/test/CodeGen/AArch64/arm64-dup.ll
index c6b7de366d23..28df305f59e1 100644
--- a/test/CodeGen/AArch64/arm64-dup.ll
+++ b/test/CodeGen/AArch64/arm64-dup.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 define <8 x i8> @v_dup8(i8 %A) nounwind {
 ;CHECK-LABEL: v_dup8:
diff --git a/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
index 8164f46664b6..388f50c3edb6 100644
--- a/test/CodeGen/AArch64/arm64-early-ifcvt.ll
+++ b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -stress-early-ifcvt -aarch64-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc < %s -stress-early-ifcvt -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
 target triple = "arm64-apple-macosx"
 
 ; CHECK: mm2
diff --git a/test/CodeGen/AArch64/arm64-ext.ll b/test/CodeGen/AArch64/arm64-ext.ll
index 8315ffcfb078..584456e70393 100644
--- a/test/CodeGen/AArch64/arm64-ext.ll
+++ b/test/CodeGen/AArch64/arm64-ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd:
diff --git a/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll b/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll
index 048fdb083a41..3ecfdfbf7461 100644
--- a/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll
+++ b/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <4 x float> @foo(<4 x i16> %a) nounwind {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/AArch64/arm64-extload-knownzero.ll b/test/CodeGen/AArch64/arm64-extload-knownzero.ll
index 642af876423a..5dd8cb282321 100644
--- a/test/CodeGen/AArch64/arm64-extload-knownzero.ll
+++ b/test/CodeGen/AArch64/arm64-extload-knownzero.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 ; rdar://12771555
 
 define void @foo(i16* %ptr, i32 %a) nounwind {
@@ -12,7 +12,6 @@ bb1:
   %tmp2 = load i16, i16* %ptr, align 2
   br label %bb2
 bb2:
-; CHECK: %bb2
 ; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff
 ; CHECK: cmp [[REG]], #23
   %tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ]
diff --git a/test/CodeGen/AArch64/arm64-extract.ll b/test/CodeGen/AArch64/arm64-extract.ll
index 6e07c4ce4ccb..71e0352915a6 100644
--- a/test/CodeGen/AArch64/arm64-extract.ll
+++ b/test/CodeGen/AArch64/arm64-extract.ll
@@ -1,5 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s \
-; RUN: -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s
 
 define i64 @ror_i64(i64 %in) {
 ; CHECK-LABEL: ror_i64:
diff --git a/test/CodeGen/AArch64/arm64-extract_subvector.ll b/test/CodeGen/AArch64/arm64-extract_subvector.ll
index 8b15a6453b2b..1a45cc254a7d 100644
--- a/test/CodeGen/AArch64/arm64-extract_subvector.ll
+++ b/test/CodeGen/AArch64/arm64-extract_subvector.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 ; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8" insn.
 
diff --git a/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
index a9b8024a5c62..48f8bd8e1302 100644
--- a/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
+++ b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define void @caller(i32* nocapture %p, i32 %a, i32 %b) nounwind optsize ssp {
 ; CHECK-NOT: stp
diff --git a/test/CodeGen/AArch64/arm64-fcmp-opt.ll b/test/CodeGen/AArch64/arm64-fcmp-opt.ll
index 41027d4b5c74..e8b1557bac66 100644
--- a/test/CodeGen/AArch64/arm64-fcmp-opt.ll
+++ b/test/CodeGen/AArch64/arm64-fcmp-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s
 ; rdar://10263824
 
 define i1 @fcmp_float1(float %a) nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
index e41e19e50eea..34dd15b268d3 100644
--- a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
+++ b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 ; DAGCombine to transform a conversion of an extract_vector_elt to an
 ; extract_vector_elt of a conversion, which saves a round trip of copies
diff --git a/test/CodeGen/AArch64/arm64-fma-combine-with-fpfusion.ll b/test/CodeGen/AArch64/arm64-fma-combine-with-fpfusion.ll
new file mode 100644
index 000000000000..095a0b0edd29
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fma-combine-with-fpfusion.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -fp-contract=fast | FileCheck %s
+define float @mul_add(float %a, float %b, float %c) local_unnamed_addr #0 {
+; CHECK-LABEL: %entry
+; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  entry:
+    %mul = fmul float %a, %b
+    %add = fadd float %mul, %c
+    ret float %add
+}
+
+attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/AArch64/arm64-fma-combines.ll b/test/CodeGen/AArch64/arm64-fma-combines.ll
index ab875c06cc62..95ef0f90d231 100644
--- a/test/CodeGen/AArch64/arm64-fma-combines.ll
+++ b/test/CodeGen/AArch64/arm64-fma-combines.ll
@@ -2,7 +2,7 @@
 define void @foo_2d(double* %src) {
 ; CHECK-LABEL: %entry
 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
 entry:
   %arrayidx1 = getelementptr inbounds double, double* %src, i64 5
   %arrayidx2 = getelementptr inbounds double, double* %src, i64 11
diff --git a/test/CodeGen/AArch64/arm64-fmadd.ll b/test/CodeGen/AArch64/arm64-fmadd.ll
index c791900cc2ff..203ce623647f 100644
--- a/test/CodeGen/AArch64/arm64-fmadd.ll
+++ b/test/CodeGen/AArch64/arm64-fmadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-eabi < %s | FileCheck %s
 
 define float @fma32(float %a, float %b, float %c) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-fmax-safe.ll b/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 8b7d66986e78..16e25547fb3c 100644
--- a/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define double @test_direct(float %in) {
 ; CHECK-LABEL: test_direct:
diff --git a/test/CodeGen/AArch64/arm64-fmax.ll b/test/CodeGen/AArch64/arm64-fmax.ll
index 40cc36ea52fa..8337d299ea53 100644
--- a/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/test/CodeGen/AArch64/arm64-fmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -enable-no-nans-fp-math | FileCheck %s
 
 define double @test_direct(float %in) {
 ; CHECK-LABEL: test_direct:
diff --git a/test/CodeGen/AArch64/arm64-fmuladd.ll b/test/CodeGen/AArch64/arm64-fmuladd.ll
index cfc8b5fe65ef..67e245a7bfa9 100644
--- a/test/CodeGen/AArch64/arm64-fmuladd.ll
+++ b/test/CodeGen/AArch64/arm64-fmuladd.ll
@@ -1,4 +1,4 @@
-; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define float @test_f32(float* %A, float* %B, float* %C) nounwind {
 ;CHECK-LABEL: test_f32:
diff --git a/test/CodeGen/AArch64/arm64-fold-lsl.ll b/test/CodeGen/AArch64/arm64-fold-lsl.ll
index e1acd6fdea74..57ef7d736730 100644
--- a/test/CodeGen/AArch64/arm64-fold-lsl.ll
+++ b/test/CodeGen/AArch64/arm64-fold-lsl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 ;
 ; <rdar://problem/14486451>
 
diff --git a/test/CodeGen/AArch64/arm64-fp-contract-zero.ll b/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
index f982cbb7f5e0..70548cad205f 100644
--- a/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
+++ b/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
@@ -11,4 +11,4 @@ define double @test_fms_fold(double %a, double %b) {
   %mul1 = fmul double %b, 0.000000e+00
   %sub = fsub double %mul, %mul1
   ret double %sub
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/arm64-fp.ll b/test/CodeGen/AArch64/arm64-fp.ll
index 08b1b6754c2a..1c88b3d9009a 100644
--- a/test/CodeGen/AArch64/arm64-fp.ll
+++ b/test/CodeGen/AArch64/arm64-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define float @t1(i1 %a, float %b, float %c) nounwind {
 ; CHECK: t1
diff --git a/test/CodeGen/AArch64/arm64-fp128-folding.ll b/test/CodeGen/AArch64/arm64-fp128-folding.ll
index 4024dc984f63..62ac0b62ce98 100644
--- a/test/CodeGen/AArch64/arm64-fp128-folding.ll
+++ b/test/CodeGen/AArch64/arm64-fp128-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s
 declare void @bar(i8*, i8*, i32*)
 
 ; SelectionDAG used to try to fold some fp128 operations using the ppc128 type,
diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll
index bcb196e40456..164351ec71db 100644
--- a/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/test/CodeGen/AArch64/arm64-fp128.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -aarch64-enable-atomic-cfg-tidy=0 < %s | FileCheck %s
 
 @lhs = global fp128 zeroinitializer, align 16
 @rhs = global fp128 zeroinitializer, align 16
@@ -156,6 +156,28 @@ define i1 @test_setcc2() {
 ; CHECK: ret
 }
 
+define i1 @test_setcc3() {
+; CHECK-LABEL: test_setcc3:
+
+  %lhs = load fp128, fp128* @lhs, align 16
+  %rhs = load fp128, fp128* @rhs, align 16
+; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
+
+  %val = fcmp ueq fp128 %lhs, %rhs
+; CHECK: bl __eqtf2
+; CHECK: cmp     w0, #0
+; CHECK: cset    w19, eq
+; CHECK: bl __unordtf2
+; CHECK: cmp     w0, #0
+; CHECK: cset    w8, ne
+; CHECK: orr     w0, w8, w19
+
+  ret i1 %val
+; CHECK: ret
+}
+
+
 define i32 @test_br_cc() {
 ; CHECK-LABEL: test_br_cc:
 
diff --git a/test/CodeGen/AArch64/arm64-frame-index.ll b/test/CodeGen/AArch64/arm64-frame-index.ll
index 321f3354ca21..0544eaebcc5a 100644
--- a/test/CodeGen/AArch64/arm64-frame-index.ll
+++ b/test/CodeGen/AArch64/arm64-frame-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mtriple=arm64-apple-ios -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
 ; rdar://11935841
 
 define void @t1() nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
index 8d74ce7f5182..1e38266b27da 100644
--- a/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
+++ b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define i32 @foo(<4 x i16>* %__a) nounwind {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/AArch64/arm64-icmp-opt.ll b/test/CodeGen/AArch64/arm64-icmp-opt.ll
index 7b12ed748617..12eae0e88fbe 100644
--- a/test/CodeGen/AArch64/arm64-icmp-opt.ll
+++ b/test/CodeGen/AArch64/arm64-icmp-opt.ll
@@ -1,16 +1,17 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 ; Optimize (x > -1) to (x >= 0) etc.
 ; Optimize (cmp (add / sub), 0): eliminate the subs used to update flag
 ;   for comparison only
 ; rdar://10233472
 
-define i32 @t1(i64 %a) nounwind ssp {
-entry:
+define i32 @t1(i64 %a) {
 ; CHECK-LABEL: t1:
-; CHECK-NOT: movn
-; CHECK: cmp  x0, #0
-; CHECK: cset w0, ge
+; CHECK:       // BB#0:
+; CHECK-NEXT:    lsr x8, x0, #63
+; CHECK-NEXT:    eor w0, w8, #0x1
+; CHECK-NEXT:    ret
+;
   %cmp = icmp sgt i64 %a, -1
   %conv = zext i1 %cmp to i32
   ret i32 %conv
diff --git a/test/CodeGen/AArch64/arm64-indexed-memory.ll b/test/CodeGen/AArch64/arm64-indexed-memory.ll
index b6ab9934dbc3..7dcd6e25ae1f 100644
--- a/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -1,230 +1,347 @@
-; RUN: llc < %s -march=arm64 -aarch64-redzone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-redzone | FileCheck %s
 
-define void @store64(i64** nocapture %out, i64 %index, i64 %spacing) nounwind noinline ssp {
+define i64* @store64(i64* %ptr, i64 %index, i64 %spacing) {
 ; CHECK-LABEL: store64:
 ; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #8
 ; CHECK: ret
-  %tmp = load i64*, i64** %out, align 8
-  %incdec.ptr = getelementptr inbounds i64, i64* %tmp, i64 1
-  store i64 %spacing, i64* %tmp, align 4
-  store i64* %incdec.ptr, i64** %out, align 8
-  ret void
+  %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 1
+  store i64 %spacing, i64* %ptr, align 4
+  ret i64* %incdec.ptr
+}
+
+define i64* @store64idxpos256(i64* %ptr, i64 %index, i64 %spacing) {
+; CHECK-LABEL: store64idxpos256:
+; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256
+; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}]
+; CHECK: ret
+  %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 32
+  store i64 %spacing, i64* %ptr, align 4
+  ret i64* %incdec.ptr
 }
 
-define void @store32(i32** nocapture %out, i32 %index, i32 %spacing) nounwind noinline ssp {
+define i64* @store64idxneg256(i64* %ptr, i64 %index, i64 %spacing) {
+; CHECK-LABEL: store64idxneg256:
+; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #-256
+; CHECK: ret
+  %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 -32
+  store i64 %spacing, i64* %ptr, align 4
+  ret i64* %incdec.ptr
+}
+
+define i32* @store32(i32* %ptr, i32 %index, i32 %spacing) {
 ; CHECK-LABEL: store32:
 ; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
 ; CHECK: ret
-  %tmp = load i32*, i32** %out, align 8
-  %incdec.ptr = getelementptr inbounds i32, i32* %tmp, i64 1
-  store i32 %spacing, i32* %tmp, align 4
-  store i32* %incdec.ptr, i32** %out, align 8
-  ret void
+  %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  store i32 %spacing, i32* %ptr, align 4
+  ret i32* %incdec.ptr
+}
+
+define i32* @store32idxpos256(i32* %ptr, i32 %index, i32 %spacing) {
+; CHECK-LABEL: store32idxpos256:
+; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256
+; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}]
+; CHECK: ret
+  %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 64
+  store i32 %spacing, i32* %ptr, align 4
+  ret i32* %incdec.ptr
+}
+
+define i32* @store32idxneg256(i32* %ptr, i32 %index, i32 %spacing) {
+; CHECK-LABEL: store32idxneg256:
+; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #-256
+; CHECK: ret
+  %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 -64
+  store i32 %spacing, i32* %ptr, align 4
+  ret i32* %incdec.ptr
 }
 
-define void @store16(i16** nocapture %out, i16 %index, i16 %spacing) nounwind noinline ssp {
+define i16* @store16(i16* %ptr, i16 %index, i16 %spacing) {
 ; CHECK-LABEL: store16:
 ; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
 ; CHECK: ret
-  %tmp = load i16*, i16** %out, align 8
-  %incdec.ptr = getelementptr inbounds i16, i16* %tmp, i64 1
-  store i16 %spacing, i16* %tmp, align 4
-  store i16* %incdec.ptr, i16** %out, align 8
-  ret void
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 1
+  store i16 %spacing, i16* %ptr, align 4
+  ret i16* %incdec.ptr
 }
 
-define void @store8(i8** nocapture %out, i8 %index, i8 %spacing) nounwind noinline ssp {
+define i16* @store16idxpos256(i16* %ptr, i16 %index, i16 %spacing) {
+; CHECK-LABEL: store16idxpos256:
+; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256
+; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}]
+; CHECK: ret
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 128
+  store i16 %spacing, i16* %ptr, align 4
+  ret i16* %incdec.ptr
+}
+
+define i16* @store16idxneg256(i16* %ptr, i16 %index, i16 %spacing) {
+; CHECK-LABEL: store16idxneg256:
+; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #-256
+; CHECK: ret
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 -128
+  store i16 %spacing, i16* %ptr, align 4
+  ret i16* %incdec.ptr
+}
+
+define i8* @store8(i8* %ptr, i8 %index, i8 %spacing) {
 ; CHECK-LABEL: store8:
 ; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
 ; CHECK: ret
-  %tmp = load i8*, i8** %out, align 8
-  %incdec.ptr = getelementptr inbounds i8, i8* %tmp, i64 1
-  store i8 %spacing, i8* %tmp, align 4
-  store i8* %incdec.ptr, i8** %out, align 8
-  ret void
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 1
+  store i8 %spacing, i8* %ptr, align 4
+  ret i8* %incdec.ptr
 }
 
-define void @truncst64to32(i32** nocapture %out, i32 %index, i64 %spacing) nounwind noinline ssp {
+define i8* @store8idxpos256(i8* %ptr, i8 %index, i8 %spacing) {
+; CHECK-LABEL: store8idxpos256:
+; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256
+; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}]
+; CHECK: ret
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 256
+  store i8 %spacing, i8* %ptr, align 4
+  ret i8* %incdec.ptr
+}
+
+define i8* @store8idxneg256(i8* %ptr, i8 %index, i8 %spacing) {
+; CHECK-LABEL: store8idxneg256:
+; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #-256
+; CHECK: ret
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 -256
+  store i8 %spacing, i8* %ptr, align 4
+  ret i8* %incdec.ptr
+}
+
+define i32* @truncst64to32(i32* %ptr, i32 %index, i64 %spacing) {
 ; CHECK-LABEL: truncst64to32:
 ; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
 ; CHECK: ret
-  %tmp = load i32*, i32** %out, align 8
-  %incdec.ptr = getelementptr inbounds i32, i32* %tmp, i64 1
+  %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
   %trunc = trunc i64 %spacing to i32
-  store i32 %trunc, i32* %tmp, align 4
-  store i32* %incdec.ptr, i32** %out, align 8
-  ret void
+  store i32 %trunc, i32* %ptr, align 4
+  ret i32* %incdec.ptr
 }
 
-define void @truncst64to16(i16** nocapture %out, i16 %index, i64 %spacing) nounwind noinline ssp {
+define i16* @truncst64to16(i16* %ptr, i16 %index, i64 %spacing) {
 ; CHECK-LABEL: truncst64to16:
 ; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
 ; CHECK: ret
-  %tmp = load i16*, i16** %out, align 8
-  %incdec.ptr = getelementptr inbounds i16, i16* %tmp, i64 1
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 1
   %trunc = trunc i64 %spacing to i16
-  store i16 %trunc, i16* %tmp, align 4
-  store i16* %incdec.ptr, i16** %out, align 8
-  ret void
+  store i16 %trunc, i16* %ptr, align 4
+  ret i16* %incdec.ptr
 }
 
-define void @truncst64to8(i8** nocapture %out, i8 %index, i64 %spacing) nounwind noinline ssp {
+define i8* @truncst64to8(i8* %ptr, i8 %index, i64 %spacing) {
 ; CHECK-LABEL: truncst64to8:
 ; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
 ; CHECK: ret
-  %tmp = load i8*, i8** %out, align 8
-  %incdec.ptr = getelementptr inbounds i8, i8* %tmp, i64 1
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 1
   %trunc = trunc i64 %spacing to i8
-  store i8 %trunc, i8* %tmp, align 4
-  store i8* %incdec.ptr, i8** %out, align 8
-  ret void
+  store i8 %trunc, i8* %ptr, align 4
+  ret i8* %incdec.ptr
 }
 
 
-define void @storef16(half** %out, half %index, half %spacing) nounwind {
+define half* @storef16(half* %ptr, half %index, half %spacing) nounwind {
 ; CHECK-LABEL: storef16:
 ; CHECK: str h{{[0-9+]}}, [x{{[0-9+]}}], #2
 ; CHECK: ret
-  %tmp = load half*, half** %out, align 2
-  %incdec.ptr = getelementptr inbounds half, half* %tmp, i64 1
-  store half %spacing, half* %tmp, align 2
-  store half* %incdec.ptr, half** %out, align 2
-  ret void
+  %incdec.ptr = getelementptr inbounds half, half* %ptr, i64 1
+  store half %spacing, half* %ptr, align 2
+  ret half* %incdec.ptr
 }
 
-define void @storef32(float** nocapture %out, float %index, float %spacing) nounwind noinline ssp {
+define float* @storef32(float* %ptr, float %index, float %spacing) {
 ; CHECK-LABEL: storef32:
 ; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4
 ; CHECK: ret
-  %tmp = load float*, float** %out, align 8
-  %incdec.ptr = getelementptr inbounds float, float* %tmp, i64 1
-  store float %spacing, float* %tmp, align 4
-  store float* %incdec.ptr, float** %out, align 8
-  ret void
+  %incdec.ptr = getelementptr inbounds float, float* %ptr, i64 1
+  store float %spacing, float* %ptr, align 4
+  ret float* %incdec.ptr
 }
 
-define void @storef64(double** nocapture %out, double %index, double %spacing) nounwind noinline ssp {
+define double* @storef64(double* %ptr, double %index, double %spacing) {
 ; CHECK-LABEL: storef64:
 ; CHECK: str d{{[0-9+]}}, [x{{[0-9+]}}], #8
 ; CHECK: ret
-  %tmp = load double*, double** %out, align 8
-  %incdec.ptr = getelementptr inbounds double, double* %tmp, i64 1
-  store double %spacing, double* %tmp, align 4
-  store double* %incdec.ptr, double** %out, align 8
-  ret void
+  %incdec.ptr = getelementptr inbounds double, double* %ptr, i64 1
+  store double %spacing, double* %ptr, align 4
+  ret double* %incdec.ptr
 }
 
-define double * @pref64(double** nocapture %out, double %spacing) nounwind noinline ssp {
+
+define double* @pref64(double* %ptr, double %spacing) {
 ; CHECK-LABEL: pref64:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     d0, [x0, #32]!
+; CHECK:      str d0, [x0, #32]!
 ; CHECK-NEXT: ret
-  %tmp = load double*, double** %out, align 8
-  %ptr = getelementptr inbounds double, double* %tmp, i64 4
-  store double %spacing, double* %ptr, align 4
-  ret double *%ptr
+  %incdec.ptr = getelementptr inbounds double, double* %ptr, i64 4
+  store double %spacing, double* %incdec.ptr, align 4
+  ret double *%incdec.ptr
 }
 
-define float * @pref32(float** nocapture %out, float %spacing) nounwind noinline ssp {
+define float* @pref32(float* %ptr, float %spacing) {
 ; CHECK-LABEL: pref32:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     s0, [x0, #12]!
+; CHECK:      str s0, [x0, #12]!
 ; CHECK-NEXT: ret
-  %tmp = load float*, float** %out, align 8
-  %ptr = getelementptr inbounds float, float* %tmp, i64 3
-  store float %spacing, float* %ptr, align 4
-  ret float *%ptr
+  %incdec.ptr = getelementptr inbounds float, float* %ptr, i64 3
+  store float %spacing, float* %incdec.ptr, align 4
+  ret float *%incdec.ptr
 }
 
-define half* @pref16(half** %out, half %spacing) nounwind {
+define half* @pref16(half* %ptr, half %spacing) nounwind {
 ; CHECK-LABEL: pref16:
-; CHECK: ldr x0, [x0]
-; CHECK-NEXT: str h0, [x0, #6]!
+; CHECK:      str h0, [x0, #6]!
 ; CHECK-NEXT: ret
-  %tmp = load half*, half** %out, align 2
-  %ptr = getelementptr inbounds half, half* %tmp, i64 3
-  store half %spacing, half* %ptr, align 2
-  ret half *%ptr
+  %incdec.ptr = getelementptr inbounds half, half* %ptr, i64 3
+  store half %spacing, half* %incdec.ptr, align 2
+  ret half *%incdec.ptr
 }
 
-define i64 * @pre64(i64** nocapture %out, i64 %spacing) nounwind noinline ssp {
+define i64* @pre64(i64* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pre64:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     x1, [x0, #16]!
+; CHECK:      str x1, [x0, #16]!
 ; CHECK-NEXT: ret
-  %tmp = load i64*, i64** %out, align 8
-  %ptr = getelementptr inbounds i64, i64* %tmp, i64 2
-  store i64 %spacing, i64* %ptr, align 4
-  ret i64 *%ptr
+  %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 2
+  store i64 %spacing, i64* %incdec.ptr, align 4
+  ret i64 *%incdec.ptr
 }
 
-define i32 * @pre32(i32** nocapture %out, i32 %spacing) nounwind noinline ssp {
+define i64* @pre64idxpos256(i64* %ptr, i64 %spacing) {
+; CHECK-LABEL: pre64idxpos256:
+; CHECK:      add x8, x0, #256
+; CHECK-NEXT: str x1, [x0, #256]
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+  %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 32
+  store i64 %spacing, i64* %incdec.ptr, align 4
+  ret i64 *%incdec.ptr
+}
+
+define i64* @pre64idxneg256(i64* %ptr, i64 %spacing) {
+; CHECK-LABEL: pre64idxneg256:
+; CHECK:      str x1, [x0, #-256]!
+; CHECK-NEXT: ret
+  %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 -32
+  store i64 %spacing, i64* %incdec.ptr, align 4
+  ret i64 *%incdec.ptr
+}
+
+define i32* @pre32(i32* %ptr, i32 %spacing) {
 ; CHECK-LABEL: pre32:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     w1, [x0, #8]!
+; CHECK:      str w1, [x0, #8]!
 ; CHECK-NEXT: ret
-  %tmp = load i32*, i32** %out, align 8
-  %ptr = getelementptr inbounds i32, i32* %tmp, i64 2
-  store i32 %spacing, i32* %ptr, align 4
-  ret i32 *%ptr
+  %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 2
+  store i32 %spacing, i32* %incdec.ptr, align 4
+  ret i32 *%incdec.ptr
+}
+
+define i32* @pre32idxpos256(i32* %ptr, i32 %spacing) {
+; CHECK-LABEL: pre32idxpos256:
+; CHECK:      add x8, x0, #256
+; CHECK-NEXT: str w1, [x0, #256]
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+  %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 64
+  store i32 %spacing, i32* %incdec.ptr, align 4
+  ret i32 *%incdec.ptr
+}
+
+define i32* @pre32idxneg256(i32* %ptr, i32 %spacing) {
+; CHECK-LABEL: pre32idxneg256:
+; CHECK:      str w1, [x0, #-256]!
+; CHECK-NEXT: ret
+  %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 -64
+  store i32 %spacing, i32* %incdec.ptr, align 4
+  ret i32 *%incdec.ptr
 }
 
-define i16 * @pre16(i16** nocapture %out, i16 %spacing) nounwind noinline ssp {
+define i16* @pre16(i16* %ptr, i16 %spacing) {
 ; CHECK-LABEL: pre16:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: strh    w1, [x0, #4]!
+; CHECK:      strh w1, [x0, #4]!
 ; CHECK-NEXT: ret
-  %tmp = load i16*, i16** %out, align 8
-  %ptr = getelementptr inbounds i16, i16* %tmp, i64 2
-  store i16 %spacing, i16* %ptr, align 4
-  ret i16 *%ptr
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 2
+  store i16 %spacing, i16* %incdec.ptr, align 4
+  ret i16 *%incdec.ptr
+}
+
+define i16* @pre16idxpos256(i16* %ptr, i16 %spacing) {
+; CHECK-LABEL: pre16idxpos256:
+; CHECK:      add x8, x0, #256
+; CHECK-NEXT: strh w1, [x0, #256]
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 128
+  store i16 %spacing, i16* %incdec.ptr, align 4
+  ret i16 *%incdec.ptr
 }
 
-define i8 * @pre8(i8** nocapture %out, i8 %spacing) nounwind noinline ssp {
+define i16* @pre16idxneg256(i16* %ptr, i16 %spacing) {
+; CHECK-LABEL: pre16idxneg256:
+; CHECK:      strh w1, [x0, #-256]!
+; CHECK-NEXT: ret
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 -128
+  store i16 %spacing, i16* %incdec.ptr, align 4
+  ret i16 *%incdec.ptr
+}
+
+define i8* @pre8(i8* %ptr, i8 %spacing) {
 ; CHECK-LABEL: pre8:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: strb    w1, [x0, #2]!
+; CHECK:      strb w1, [x0, #2]!
 ; CHECK-NEXT: ret
-  %tmp = load i8*, i8** %out, align 8
-  %ptr = getelementptr inbounds i8, i8* %tmp, i64 2
-  store i8 %spacing, i8* %ptr, align 4
-  ret i8 *%ptr
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 2
+  store i8 %spacing, i8* %incdec.ptr, align 4
+  ret i8 *%incdec.ptr
 }
 
-define i32 * @pretrunc64to32(i32** nocapture %out, i64 %spacing) nounwind noinline ssp {
+define i8* @pre8idxpos256(i8* %ptr, i8 %spacing) {
+; CHECK-LABEL: pre8idxpos256:
+; CHECK:      add x8, x0, #256
+; CHECK-NEXT: strb w1, [x0, #256]
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 256
+  store i8 %spacing, i8* %incdec.ptr, align 4
+  ret i8 *%incdec.ptr
+}
+
+define i8* @pre8idxneg256(i8* %ptr, i8 %spacing) {
+; CHECK-LABEL: pre8idxneg256:
+; CHECK:      strb w1, [x0, #-256]!
+; CHECK-NEXT: ret
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 -256
+  store i8 %spacing, i8* %incdec.ptr, align 4
+  ret i8 *%incdec.ptr
+}
+
+define i32* @pretrunc64to32(i32* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pretrunc64to32:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     w1, [x0, #8]!
+; CHECK:      str w1, [x0, #8]!
 ; CHECK-NEXT: ret
-  %tmp = load i32*, i32** %out, align 8
-  %ptr = getelementptr inbounds i32, i32* %tmp, i64 2
+  %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 2
   %trunc = trunc i64 %spacing to i32
-  store i32 %trunc, i32* %ptr, align 4
-  ret i32 *%ptr
+  store i32 %trunc, i32* %incdec.ptr, align 4
+  ret i32 *%incdec.ptr
 }
 
-define i16 * @pretrunc64to16(i16** nocapture %out, i64 %spacing) nounwind noinline ssp {
+define i16* @pretrunc64to16(i16* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pretrunc64to16:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: strh    w1, [x0, #4]!
+; CHECK:      strh w1, [x0, #4]!
 ; CHECK-NEXT: ret
-  %tmp = load i16*, i16** %out, align 8
-  %ptr = getelementptr inbounds i16, i16* %tmp, i64 2
+  %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 2
   %trunc = trunc i64 %spacing to i16
-  store i16 %trunc, i16* %ptr, align 4
-  ret i16 *%ptr
+  store i16 %trunc, i16* %incdec.ptr, align 4
+  ret i16 *%incdec.ptr
 }
 
-define i8 * @pretrunc64to8(i8** nocapture %out, i64 %spacing) nounwind noinline ssp {
+define i8* @pretrunc64to8(i8* %ptr, i64 %spacing) {
 ; CHECK-LABEL: pretrunc64to8:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: strb    w1, [x0, #2]!
+; CHECK:      strb w1, [x0, #2]!
 ; CHECK-NEXT: ret
-  %tmp = load i8*, i8** %out, align 8
-  %ptr = getelementptr inbounds i8, i8* %tmp, i64 2
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 2
   %trunc = trunc i64 %spacing to i8
-  store i8 %trunc, i8* %ptr, align 4
-  ret i8 *%ptr
+  store i8 %trunc, i8* %incdec.ptr, align 4
+  ret i8 *%incdec.ptr
 }
 
 ;-----
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 98d4e3646f56..071b2d0dbca4 100644
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -6174,11 +6174,10 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i6
 }
 
 ; Check for dependencies between the vector and the scalar load.
-define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2) {
+define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2, <4 x float> %vec) {
 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
 ; CHECK: BB#0:
 ; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0]
-; CHECK-NEXT: movi.2d v0, #0000000000000000
 ; CHECK-NEXT: str q0, [x3]
 ; CHECK-NEXT: ldr q0, [x4]
 ; CHECK-NEXT: ins.s v0[1], v[[LD]][0]
@@ -6186,7 +6185,7 @@ define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, flo
 ; CHECK-NEXT: str [[POST]], [x1]
 ; CHECK-NEXT: ret
   %tmp1 = load float, float* %bar
-  store <4 x float> zeroinitializer, <4 x float>* %dep_ptr_1, align 16
+  store <4 x float> %vec, <4 x float>* %dep_ptr_1, align 16
   %A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16
   %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
   %tmp3 = getelementptr float, float* %bar, i64 %inc
diff --git a/test/CodeGen/AArch64/arm64-inline-asm-error-I.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-I.ll
index a7aaf9e55d1b..7dc9f7260037 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm-error-I.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-I.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -march=arm64 < %s  2> %t
+; RUN: not llc -mtriple=arm64-eabi < %s  2> %t
 ; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
 
 ; Check for at least one invalid constant.
diff --git a/test/CodeGen/AArch64/arm64-inline-asm-error-J.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-J.ll
index 077e1b80d93f..592875b0cb0c 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm-error-J.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-J.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -march=arm64 < %s  2> %t
+; RUN: not llc -mtriple=arm64-eabi < %s  2> %t
 ; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
 
 ; Check for at least one invalid constant.
diff --git a/test/CodeGen/AArch64/arm64-inline-asm-error-K.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-K.ll
index 2a7f9619de55..893e8d29e65d 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm-error-K.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-K.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -march=arm64 < %s  2> %t
+; RUN: not llc -mtriple=arm64-eabi < %s  2> %t
 ; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
 
 ; Check for at least one invalid constant.
diff --git a/test/CodeGen/AArch64/arm64-inline-asm-error-L.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-L.ll
index 170194341951..b2fb822aa299 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm-error-L.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-L.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -march=arm64 < %s  2> %t
+; RUN: not llc -mtriple=arm64-eabi < %s  2> %t
 ; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
 
 ; Check for at least one invalid constant.
diff --git a/test/CodeGen/AArch64/arm64-inline-asm-error-M.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-M.ll
index 952bf6042c2d..aaee933fd6dc 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm-error-M.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-M.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -march=arm64 < %s  2> %t
+; RUN: not llc -mtriple=arm64-eabi < %s  2> %t
 ; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
 
 ; Check for at least one invalid constant.
diff --git a/test/CodeGen/AArch64/arm64-inline-asm-error-N.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-N.ll
index b4a199f160ac..d1d2e03548e2 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm-error-N.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-N.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -march=arm64 < %s  2> %t
+; RUN: not llc -mtriple=arm64-eabi < %s  2> %t
 ; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
 
 ; Check for at least one invalid constant.
diff --git a/test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll b/test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll
index 6bfce8f8f6a4..0641bf148719 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll
@@ -1,4 +1,4 @@
-; RUN: not llc < %s -march=arm64 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=arm64-eabi 2>&1 | FileCheck %s
 
 
 ; The 'z' constraint allocates either xzr or wzr, but obviously an input of 1 is
diff --git a/test/CodeGen/AArch64/arm64-inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll
index 4d4adb10d556..f3f359380440 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -246,3 +246,11 @@ define <4 x float> @test_vreg_128bit(<4 x float> %in) nounwind {
   ; CHECK fadd v14.4s, v0.4s, v0.4s:
   ret <4 x float> %1
 }
+
+define void @test_constraint_w(i32 %a) {
+  ; CHECK: fmov [[SREG:s[0-9]+]], {{w[0-9]+}}
+  ; CHECK: sqxtn h0, [[SREG]]
+
+  tail call void asm sideeffect "sqxtn h0, ${0:s}\0A", "w"(i32 %a)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-jumptable.ll b/test/CodeGen/AArch64/arm64-jumptable.ll
index 4635cfe5858d..c7f213fa8464 100644
--- a/test/CodeGen/AArch64/arm64-jumptable.ll
+++ b/test/CodeGen/AArch64/arm64-jumptable.ll
@@ -2,25 +2,26 @@
 ; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX
 ; <rdar://11417675>
 
-define void @sum(i32* %to) {
+define void @sum(i32 %a, i32* %to, i32 %c) {
 entry:
-  switch i32 undef, label %exit [
+  switch i32 %a, label %exit [
     i32 1, label %bb1
     i32 2, label %bb2
     i32 3, label %bb3
     i32 4, label %bb4
   ]
 bb1:
-  store i32 undef, i32* %to
+  %b = add i32 %c, 1
+  store i32 %b, i32* %to
   br label %exit
 bb2:
-  store i32 undef, i32* %to
+  store i32 2, i32* %to
   br label %exit
 bb3:
-  store i32 undef, i32* %to
+  store i32 3, i32* %to
   br label %exit
 bb4:
-  store i32 undef, i32* %to
+  store i32 4, i32* %to
   br label %exit
 exit:
   ret void
diff --git a/test/CodeGen/AArch64/arm64-ld1.ll b/test/CodeGen/AArch64/arm64-ld1.ll
index a83a2703addc..5f1caa2d67f8 100644
--- a/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s
 
 %struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
diff --git a/test/CodeGen/AArch64/arm64-ldp-aa.ll b/test/CodeGen/AArch64/arm64-ldp-aa.ll
index ad5c01cfe34e..acc70988e360 100644
--- a/test/CodeGen/AArch64/arm64-ldp-aa.ll
+++ b/test/CodeGen/AArch64/arm64-ldp-aa.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -enable-misched=false -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -enable-misched=false -verify-machineinstrs | FileCheck %s
 
 ; The next set of tests makes sure we can combine the second instruction into
 ; the first.
diff --git a/test/CodeGen/AArch64/arm64-ldp.ll b/test/CodeGen/AArch64/arm64-ldp.ll
index 6071d092f8b3..998ff9e895fb 100644
--- a/test/CodeGen/AArch64/arm64-ldp.ll
+++ b/test/CodeGen/AArch64/arm64-ldp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s
 
 ; CHECK-LABEL: ldp_int
 ; CHECK: ldp
diff --git a/test/CodeGen/AArch64/arm64-ldur.ll b/test/CodeGen/AArch64/arm64-ldur.ll
index c4bf397d5d03..cfd9bfeb599a 100644
--- a/test/CodeGen/AArch64/arm64-ldur.ll
+++ b/test/CodeGen/AArch64/arm64-ldur.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define i64 @_f0(i64* %p) {
 ; CHECK: f0:
diff --git a/test/CodeGen/AArch64/arm64-leaf.ll b/test/CodeGen/AArch64/arm64-leaf.ll
index d3b2031686e8..2bdf0290013d 100644
--- a/test/CodeGen/AArch64/arm64-leaf.ll
+++ b/test/CodeGen/AArch64/arm64-leaf.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
 ; rdar://12829704
 
 define void @t8() nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-long-shift.ll b/test/CodeGen/AArch64/arm64-long-shift.ll
index ad89d3ff711b..cc4defefa328 100644
--- a/test/CodeGen/AArch64/arm64-long-shift.ll
+++ b/test/CodeGen/AArch64/arm64-long-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s
 
 define i128 @shl(i128 %r, i128 %s) nounwind readnone {
 ; CHECK-LABEL: shl:
diff --git a/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-memcpy-inline.ll
index 23e90100fb94..0590031fbcdc 100644
--- a/test/CodeGen/AArch64/arm64-memcpy-inline.ll
+++ b/test/CodeGen/AArch64/arm64-memcpy-inline.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s
 
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 
diff --git a/test/CodeGen/AArch64/arm64-memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll
index 56959ade0439..8f22f97ca087 100644
--- a/test/CodeGen/AArch64/arm64-memset-inline.ll
+++ b/test/CodeGen/AArch64/arm64-memset-inline.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define void @t1(i8* nocapture %c) nounwind optsize {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
index 8b270abef59a..41287a17da86 100644
--- a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - -misched-limit=2 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -disable-machine-dce -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -disable-machine-dce -o - -misched-limit=2 2>&1 > /dev/null | FileCheck %s
 ;
 ; The Cortex-A53 machine model will cause the MADD instruction to be scheduled
 ; much higher than the ADD instructions in order to hide latency. When not
@@ -182,22 +182,22 @@ declare void @llvm.trap()
 ; CHECK: LD4Fourv2d
 ; CHECK: STRQui
 ; CHECK: ********** INTERVALS **********
-define void @testLdStConflict() {
+define void @testLdStConflict(<2 x i64> %v) {
 entry:
   br label %loop
 
 loop:
   %0 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i8(i8* null)
   %ptr = bitcast i8* undef to <2 x i64>*
-  store <2 x i64> zeroinitializer, <2 x i64>* %ptr, align 4
+  store <2 x i64> %v, <2 x i64>* %ptr, align 4
   %ptr1 = bitcast i8* undef to <2 x i64>*
-  store <2 x i64> zeroinitializer, <2 x i64>* %ptr1, align 4
+  store <2 x i64> %v, <2 x i64>* %ptr1, align 4
   %ptr2 = bitcast i8* undef to <2 x i64>*
-  store <2 x i64> zeroinitializer, <2 x i64>* %ptr2, align 4
+  store <2 x i64> %v, <2 x i64>* %ptr2, align 4
   %ptr3 = bitcast i8* undef to <2 x i64>*
-  store <2 x i64> zeroinitializer, <2 x i64>* %ptr3, align 4
+  store <2 x i64> %v, <2 x i64>* %ptr3, align 4
   %ptr4 = bitcast i8* undef to <2 x i64>*
-  store <2 x i64> zeroinitializer, <2 x i64>* %ptr4, align 4
+  store <2 x i64> %v, <2 x i64>* %ptr4, align 4
   br label %loop
 }
 
diff --git a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
index 07373ccedc5b..0ee74d1f782e 100644
--- a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
+++ b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
@@ -8,8 +8,8 @@
 ; CHECK: shiftable
 ; CHECK: SU(2):   %vreg2<def> = SUBXri %vreg1, 20, 0
 ; CHECK:   Successors:
-; CHECK-NEXT:    val SU(4): Latency=1 Reg=%vreg2
-; CHECK-NEXT:    val SU(3): Latency=2 Reg=%vreg2
+; CHECK-NEXT:    data SU(4): Latency=1 Reg=%vreg2
+; CHECK-NEXT:    data SU(3): Latency=2 Reg=%vreg2
 ; CHECK: ********** INTERVALS **********
 define i64 @shiftable(i64 %A, i64 %B) {
         %tmp0 = sub i64 %B, 20
diff --git a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll
index 292fbb744cea..0ec754f97ec7 100644
--- a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll
+++ b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll
@@ -7,11 +7,11 @@
 ; CHECK: misched_bug:BB#0 entry
 ; CHECK: SU(2):   %vreg2<def> = LDRWui %vreg0, 1; mem:LD4[%ptr1_plus1] GPR32:%vreg2 GPR64common:%vreg0
 ; CHECK:   Successors:
-; CHECK-NEXT:    val SU(5): Latency=4 Reg=%vreg2
-; CHECK-NEXT:    ch  SU(4): Latency=0
+; CHECK-NEXT:    data SU(5): Latency=4 Reg=%vreg2
+; CHECK-NEXT:    ord  SU(4): Latency=0
 ; CHECK: SU(3):   STRWui %WZR, %vreg0, 0; mem:ST4[%ptr1] GPR64common:%vreg0
 ; CHECK:   Successors:
-; CHECK: ch  SU(4): Latency=0
+; CHECK: ord  SU(4): Latency=0
 ; CHECK: SU(4):   STRWui %WZR, %vreg1, 0; mem:ST4[%ptr2] GPR64common:%vreg1
 ; CHECK: SU(5):   %W0<def> = COPY %vreg2; GPR32:%vreg2
 ; CHECK: ** ScheduleDAGMI::schedule picking next node
diff --git a/test/CodeGen/AArch64/arm64-misched-multimmo.ll b/test/CodeGen/AArch64/arm64-misched-multimmo.ll
index d4e8aa1a0a06..3593668e0156 100644
--- a/test/CodeGen/AArch64/arm64-misched-multimmo.ll
+++ b/test/CodeGen/AArch64/arm64-misched-multimmo.ll
@@ -7,7 +7,7 @@
 
 ; Check that no scheduling dependencies are created between the paired loads and the store during post-RA MI scheduling.
 ;
-; CHECK-LABEL: # Machine code for function foo: Properties: <Post SSA
+; CHECK-LABEL: # Machine code for function foo:
 ; CHECK: SU(2):   %W{{[0-9]+}}<def>, %W{{[0-9]+}}<def> = LDPWi
 ; CHECK: Successors:
 ; CHECK-NOT: ch SU(4)
diff --git a/test/CodeGen/AArch64/arm64-movi.ll b/test/CodeGen/AArch64/arm64-movi.ll
index 344e2224ab43..c24490665d62 100644
--- a/test/CodeGen/AArch64/arm64-movi.ll
+++ b/test/CodeGen/AArch64/arm64-movi.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 ;==--------------------------------------------------------------------------==
 ; Tests for MOV-immediate implemented with ORR-immediate.
diff --git a/test/CodeGen/AArch64/arm64-mul.ll b/test/CodeGen/AArch64/arm64-mul.ll
index a424dc761bc8..d01b05210187 100644
--- a/test/CodeGen/AArch64/arm64-mul.ll
+++ b/test/CodeGen/AArch64/arm64-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 ; rdar://9296808
 ; rdar://9349137
diff --git a/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll b/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
deleted file mode 100644
index be5b7e9b2966..000000000000
--- a/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
+++ /dev/null
@@ -1,496 +0,0 @@
-; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=cortex-a57 -verify-machineinstrs -enable-narrow-ld-merge=true | FileCheck %s --check-prefix=CHECK --check-prefix=LE
-; RUN: llc < %s -mtriple aarch64_be--none-eabi -mcpu=cortex-a57 -verify-machineinstrs -enable-narrow-ld-merge=true | FileCheck %s --check-prefix=CHECK --check-prefix=BE
-; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=kryo -verify-machineinstrs -enable-narrow-ld-merge=true | FileCheck %s --check-prefix=CHECK --check-prefix=LE
-
-; CHECK-LABEL: Ldrh_merge
-; CHECK-NOT: ldrh
-; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i16 @Ldrh_merge(i16* nocapture readonly %p) {
-  %1 = load i16, i16* %p, align 2
-  %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1
-  %2 = load i16, i16* %arrayidx2, align 2
-  %add = sub nuw nsw i16 %1, %2
-  ret i16 %add
-}
-
-; CHECK-LABEL: Ldurh_merge
-; CHECK-NOT: ldurh
-; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; CHECK-DAG: lsr  [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i16 @Ldurh_merge(i16* nocapture readonly %p)  {
-entry:
-  %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2
-  %0 = load i16, i16* %arrayidx
-  %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1
-  %1 = load i16, i16* %arrayidx3
-  %add = sub nuw nsw i16 %0, %1
-  ret i16 %add
-}
-
-; CHECK-LABEL: Ldrh_4_merge
-; CHECK-NOT: ldrh
-; CHECK: ldp [[WORD1:w[0-9]+]], [[WORD2:w[0-9]+]], [x0]
-; CHECK-DAG: and [[WORD1LO:w[0-9]+]], [[WORD1]], #0xffff
-; CHECK-DAG: lsr [[WORD1HI:w[0-9]+]], [[WORD1]], #16
-; CHECK-DAG: and [[WORD2LO:w[0-9]+]], [[WORD2]], #0xffff
-; CHECK-DAG: lsr [[WORD2HI:w[0-9]+]], [[WORD2]], #16
-; LE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1HI]], [[WORD1LO]]
-; BE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1LO]], [[WORD1HI]]
-; LE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2LO]]
-; BE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2HI]]
-; LE: sub w0, [[TEMP2]], [[WORD2HI]]
-; BE: sub w0, [[TEMP2]], [[WORD2LO]]
-define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
-  %arrayidx = getelementptr inbounds i16, i16* %P, i64 0
-  %l0 = load i16, i16* %arrayidx
-  %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 1
-  %l1 = load i16, i16* %arrayidx2
-  %arrayidx7 = getelementptr inbounds i16, i16* %P, i64 2
-  %l2 = load i16, i16* %arrayidx7
-  %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3
-  %l3 = load i16, i16* %arrayidx12
-  %add4 = sub nuw nsw i16 %l1, %l0
-  %add9 = udiv i16 %add4, %l2
-  %add14 = sub nuw nsw i16 %add9, %l3
-  ret i16 %add14
-}
-
-; CHECK-LABEL: Ldrsh_merge
-; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-
-define i32 @Ldrsh_merge(i16* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
-  %tmp = load i16, i16* %add.ptr0
-  %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
-  %tmp1 = load i16, i16* %add.ptr
-  %sexttmp = sext i16 %tmp to i32
-  %sexttmp1 = sext i16 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp1, %sexttmp
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsh_zsext_merge
-; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; LE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; BE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; BE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsh_zsext_merge(i16* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
-  %tmp = load i16, i16* %add.ptr0
-  %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
-  %tmp1 = load i16, i16* %add.ptr
-  %sexttmp = zext i16 %tmp to i32
-  %sexttmp1 = sext i16 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsh_szext_merge
-; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; LE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; LE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; BE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsh_szext_merge(i16* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
-  %tmp = load i16, i16* %add.ptr0
-  %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
-  %tmp1 = load i16, i16* %add.ptr
-  %sexttmp = sext i16 %tmp to i32
-  %sexttmp1 = zext i16 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldrb_merge
-; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; CHECK-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrb_merge(i8* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
-  %tmp = load i8, i8* %add.ptr0
-  %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
-  %tmp1 = load i8, i8* %add.ptr
-  %sexttmp = zext i8 %tmp to i32
-  %sexttmp1 = zext i8 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsb_merge
-; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; CHECK-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsb_merge(i8* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
-  %tmp = load i8, i8* %add.ptr0
-  %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
-  %tmp1 = load i8, i8* %add.ptr
-  %sexttmp = sext i8 %tmp to i32
-  %sexttmp1 = sext i8 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsb_zsext_merge
-; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
-; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; LE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; BE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; BE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsb_zsext_merge(i8* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
-  %tmp = load i8, i8* %add.ptr0
-  %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
-  %tmp1 = load i8, i8* %add.ptr
-  %sexttmp = zext i8 %tmp to i32
-  %sexttmp1 = sext i8 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsb_szext_merge
-; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
-; LE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; LE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; BE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsb_szext_merge(i8* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
-  %tmp = load i8, i8* %add.ptr0
-  %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
-  %tmp1 = load i8, i8* %add.ptr
-  %sexttmp = sext i8 %tmp to i32
-  %sexttmp1 = zext i8 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldursh_merge
-; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: asr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursh_merge(i16* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
-  %tmp = load i16, i16* %add.ptr0
-  %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
-  %tmp1 = load i16, i16* %add.ptr
-  %sexttmp = sext i16 %tmp to i32
-  %sexttmp1 = sext i16 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldursh_zsext_merge
-; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; LE-DAG: lsr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; BE-DAG: asr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursh_zsext_merge(i16* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
-  %tmp = load i16, i16* %add.ptr0
-  %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
-  %tmp1 = load i16, i16* %add.ptr
-  %sexttmp = zext i16 %tmp to i32
-  %sexttmp1 = sext i16 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldursh_szext_merge
-; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; LE-DAG: asr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; BE-DAG: lsr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; BE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursh_szext_merge(i16* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
-  %tmp = load i16, i16* %add.ptr0
-  %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
-  %tmp1 = load i16, i16* %add.ptr
-  %sexttmp = sext i16 %tmp to i32
-  %sexttmp1 = zext i16 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldurb_merge
-; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: ubfx  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; CHECK-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldurb_merge(i8* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
-  %tmp = load i8, i8* %add.ptr0
-  %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
-  %tmp1 = load i8, i8* %add.ptr
-  %sexttmp = zext i8 %tmp to i32
-  %sexttmp1 = zext i8 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldursb_merge
-; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; CHECK-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursb_merge(i8* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
-  %tmp = load i8, i8* %add.ptr0
-  %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
-  %tmp1 = load i8, i8* %add.ptr
-  %sexttmp = sext i8 %tmp to i32
-  %sexttmp1 = sext i8 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldursb_zsext_merge
-; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
-; LE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; BE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursb_zsext_merge(i8* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
-  %tmp = load i8, i8* %add.ptr0
-  %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
-  %tmp1 = load i8, i8* %add.ptr
-  %sexttmp = zext i8 %tmp to i32
-  %sexttmp1 = sext i8 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Ldursb_szext_merge
-; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
-; LE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; BE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; BE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursb_szext_merge(i8* %p) nounwind {
-  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
-  %tmp = load i8, i8* %add.ptr0
-  %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
-  %tmp1 = load i8, i8* %add.ptr
-  %sexttmp = sext i8 %tmp to i32
-  %sexttmp1 = zext i8 %tmp1 to i32
-  %add = sub nsw i32 %sexttmp, %sexttmp1
-  ret i32 %add
-}
-
-; CHECK-LABEL: Strh_zero
-; CHECK: str wzr
-define void @Strh_zero(i16* nocapture %P, i32 %n) {
-entry:
- %idxprom = sext i32 %n to i64
-  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
- store i16 0, i16* %arrayidx
-  %add = add nsw i32 %n, 1
-  %idxprom1 = sext i32 %add to i64
-  %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
-  store i16 0, i16* %arrayidx2
-  ret void
-}
-
-; CHECK-LABEL: Strh_zero_4
-; CHECK: stp wzr, wzr
-define void @Strh_zero_4(i16* nocapture %P, i32 %n) {
-entry:
-  %idxprom = sext i32 %n to i64
-  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
-  store i16 0, i16* %arrayidx
-  %add = add nsw i32 %n, 1
-  %idxprom1 = sext i32 %add to i64
-  %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
-  store i16 0, i16* %arrayidx2
-  %add3 = add nsw i32 %n, 2
-  %idxprom4 = sext i32 %add3 to i64
-  %arrayidx5 = getelementptr inbounds i16, i16* %P, i64 %idxprom4
-  store i16 0, i16* %arrayidx5
-  %add6 = add nsw i32 %n, 3
-  %idxprom7 = sext i32 %add6 to i64
-  %arrayidx8 = getelementptr inbounds i16, i16* %P, i64 %idxprom7
-  store i16 0, i16* %arrayidx8
-  ret void
-}
-
-; CHECK-LABEL: Strw_zero
-; CHECK: str xzr
-define void @Strw_zero(i32* nocapture %P, i32 %n) {
-entry:
-  %idxprom = sext i32 %n to i64
-  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
-  store i32 0, i32* %arrayidx
-  %add = add nsw i32 %n, 1
-  %idxprom1 = sext i32 %add to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
-  store i32 0, i32* %arrayidx2
-  ret void
-}
-
-; CHECK-LABEL: Strw_zero_nonzero
-; CHECK: stp wzr, w1
-define void @Strw_zero_nonzero(i32* nocapture %P, i32 %n)  {
-entry:
-  %idxprom = sext i32 %n to i64
-  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
-  store i32 0, i32* %arrayidx
-  %add = add nsw i32 %n, 1
-  %idxprom1 = sext i32 %add to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
-  store i32 %n, i32* %arrayidx2
-  ret void
-}
-
-; CHECK-LABEL: Strw_zero_4
-; CHECK: stp xzr
-define void @Strw_zero_4(i32* nocapture %P, i32 %n) {
-entry:
-  %idxprom = sext i32 %n to i64
-  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
-  store i32 0, i32* %arrayidx
-  %add = add nsw i32 %n, 1
-  %idxprom1 = sext i32 %add to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
-  store i32 0, i32* %arrayidx2
-  %add3 = add nsw i32 %n, 2
-  %idxprom4 = sext i32 %add3 to i64
-  %arrayidx5 = getelementptr inbounds i32, i32* %P, i64 %idxprom4
-  store i32 0, i32* %arrayidx5
-  %add6 = add nsw i32 %n, 3
-  %idxprom7 = sext i32 %add6 to i64
-  %arrayidx8 = getelementptr inbounds i32, i32* %P, i64 %idxprom7
-  store i32 0, i32* %arrayidx8
-  ret void
-}
-
-; CHECK-LABEL: Sturb_zero
-; CHECK: sturh wzr
-define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 {
-entry:
-  %sub = add nsw i32 %n, -2
-  %idxprom = sext i32 %sub to i64
-  %arrayidx = getelementptr inbounds i8, i8* %P, i64 %idxprom
-  store i8 0, i8* %arrayidx
-  %sub2= add nsw i32 %n, -1
-  %idxprom1 = sext i32 %sub2 to i64
-  %arrayidx2 = getelementptr inbounds i8, i8* %P, i64 %idxprom1
-  store i8 0, i8* %arrayidx2
-  ret void
-}
-
-; CHECK-LABEL: Sturh_zero
-; CHECK: stur wzr
-define void @Sturh_zero(i16* nocapture %P, i32 %n) {
-entry:
-  %sub = add nsw i32 %n, -2
-  %idxprom = sext i32 %sub to i64
-  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
-  store i16 0, i16* %arrayidx
-  %sub1 = add nsw i32 %n, -3
-  %idxprom2 = sext i32 %sub1 to i64
-  %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
-  store i16 0, i16* %arrayidx3
-  ret void
-}
-
-; CHECK-LABEL: Sturh_zero_4
-; CHECK: stp wzr, wzr
-define void @Sturh_zero_4(i16* nocapture %P, i32 %n) {
-entry:
-  %sub = add nsw i32 %n, -3
-  %idxprom = sext i32 %sub to i64
-  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
-  store i16 0, i16* %arrayidx
-  %sub1 = add nsw i32 %n, -4
-  %idxprom2 = sext i32 %sub1 to i64
-  %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
-  store i16 0, i16* %arrayidx3
-  %sub4 = add nsw i32 %n, -2
-  %idxprom5 = sext i32 %sub4 to i64
-  %arrayidx6 = getelementptr inbounds i16, i16* %P, i64 %idxprom5
-  store i16 0, i16* %arrayidx6
-  %sub7 = add nsw i32 %n, -1
-  %idxprom8 = sext i32 %sub7 to i64
-  %arrayidx9 = getelementptr inbounds i16, i16* %P, i64 %idxprom8
-  store i16 0, i16* %arrayidx9
-  ret void
-}
-
-; CHECK-LABEL: Sturw_zero
-; CHECK: stur xzr
-define void @Sturw_zero(i32* nocapture %P, i32 %n) {
-entry:
-  %sub = add nsw i32 %n, -3
-  %idxprom = sext i32 %sub to i64
-  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
-  store i32 0, i32* %arrayidx
-  %sub1 = add nsw i32 %n, -4
-  %idxprom2 = sext i32 %sub1 to i64
-  %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
-  store i32 0, i32* %arrayidx3
-  ret void
-}
-
-; CHECK-LABEL: Sturw_zero_4
-; CHECK: stp xzr, xzr
-define void @Sturw_zero_4(i32* nocapture %P, i32 %n) {
-entry:
-  %sub = add nsw i32 %n, -3
-  %idxprom = sext i32 %sub to i64
-  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
-  store i32 0, i32* %arrayidx
-  %sub1 = add nsw i32 %n, -4
-  %idxprom2 = sext i32 %sub1 to i64
-  %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
-  store i32 0, i32* %arrayidx3
-  %sub4 = add nsw i32 %n, -2
-  %idxprom5 = sext i32 %sub4 to i64
-  %arrayidx6 = getelementptr inbounds i32, i32* %P, i64 %idxprom5
-  store i32 0, i32* %arrayidx6
-  %sub7 = add nsw i32 %n, -1
-  %idxprom8 = sext i32 %sub7 to i64
-  %arrayidx9 = getelementptr inbounds i32, i32* %P, i64 %idxprom8
-  store i32 0, i32* %arrayidx9
-  ret void
-}
-
diff --git a/test/CodeGen/AArch64/arm64-narrow-st-merge.ll b/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
new file mode 100644
index 000000000000..ec7c227e1699
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
@@ -0,0 +1,209 @@
+; RUN: llc < %s -mtriple aarch64--none-eabi -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple aarch64--none-eabi -mattr=+strict-align -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-STRICT
+
+; CHECK-LABEL: Strh_zero
+; CHECK: str wzr
+; CHECK-STRICT-LABEL: Strh_zero
+; CHECK-STRICT: strh wzr
+; CHECK-STRICT: strh wzr
+define void @Strh_zero(i16* nocapture %P, i32 %n) {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+  store i16 0, i16* %arrayidx
+  %add = add nsw i32 %n, 1
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
+  store i16 0, i16* %arrayidx2
+  ret void
+}
+
+; CHECK-LABEL: Strh_zero_4
+; CHECK: stp wzr, wzr
+; CHECK-STRICT-LABEL: Strh_zero_4
+; CHECK-STRICT: strh wzr
+; CHECK-STRICT: strh wzr
+; CHECK-STRICT: strh wzr
+; CHECK-STRICT: strh wzr
+define void @Strh_zero_4(i16* nocapture %P, i32 %n) {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+  store i16 0, i16* %arrayidx
+  %add = add nsw i32 %n, 1
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
+  store i16 0, i16* %arrayidx2
+  %add3 = add nsw i32 %n, 2
+  %idxprom4 = sext i32 %add3 to i64
+  %arrayidx5 = getelementptr inbounds i16, i16* %P, i64 %idxprom4
+  store i16 0, i16* %arrayidx5
+  %add6 = add nsw i32 %n, 3
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds i16, i16* %P, i64 %idxprom7
+  store i16 0, i16* %arrayidx8
+  ret void
+}
+
+; CHECK-LABEL: Strw_zero
+; CHECK: str xzr
+; CHECK-STRICT-LABEL: Strw_zero
+; CHECK-STRICT: stp wzr, wzr
+define void @Strw_zero(i32* nocapture %P, i32 %n) {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+  store i32 0, i32* %arrayidx
+  %add = add nsw i32 %n, 1
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+  store i32 0, i32* %arrayidx2
+  ret void
+}
+
+; CHECK-LABEL: Strw_zero_nonzero
+; CHECK: stp wzr, w1
+define void @Strw_zero_nonzero(i32* nocapture %P, i32 %n)  {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+  store i32 0, i32* %arrayidx
+  %add = add nsw i32 %n, 1
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+  store i32 %n, i32* %arrayidx2
+  ret void
+}
+
+; CHECK-LABEL: Strw_zero_4
+; CHECK: stp xzr, xzr
+; CHECK-STRICT-LABEL: Strw_zero_4
+; CHECK-STRICT: stp wzr, wzr
+; CHECK-STRICT: stp wzr, wzr
+define void @Strw_zero_4(i32* nocapture %P, i32 %n) {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+  store i32 0, i32* %arrayidx
+  %add = add nsw i32 %n, 1
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+  store i32 0, i32* %arrayidx2
+  %add3 = add nsw i32 %n, 2
+  %idxprom4 = sext i32 %add3 to i64
+  %arrayidx5 = getelementptr inbounds i32, i32* %P, i64 %idxprom4
+  store i32 0, i32* %arrayidx5
+  %add6 = add nsw i32 %n, 3
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds i32, i32* %P, i64 %idxprom7
+  store i32 0, i32* %arrayidx8
+  ret void
+}
+
+; CHECK-LABEL: Sturb_zero
+; CHECK: sturh wzr
+; CHECK-STRICT-LABEL: Sturb_zero
+; CHECK-STRICT: sturb wzr
+; CHECK-STRICT: sturb wzr
+define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 {
+entry:
+  %sub = add nsw i32 %n, -2
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i8, i8* %P, i64 %idxprom
+  store i8 0, i8* %arrayidx
+  %sub2= add nsw i32 %n, -1
+  %idxprom1 = sext i32 %sub2 to i64
+  %arrayidx2 = getelementptr inbounds i8, i8* %P, i64 %idxprom1
+  store i8 0, i8* %arrayidx2
+  ret void
+}
+
+; CHECK-LABEL: Sturh_zero
+; CHECK: stur wzr
+; CHECK-STRICT-LABEL: Sturh_zero
+; CHECK-STRICT: sturh wzr
+; CHECK-STRICT: sturh wzr
+define void @Sturh_zero(i16* nocapture %P, i32 %n) {
+entry:
+  %sub = add nsw i32 %n, -2
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+  store i16 0, i16* %arrayidx
+  %sub1 = add nsw i32 %n, -3
+  %idxprom2 = sext i32 %sub1 to i64
+  %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
+  store i16 0, i16* %arrayidx3
+  ret void
+}
+
+; CHECK-LABEL: Sturh_zero_4
+; CHECK: stp wzr, wzr
+; CHECK-STRICT-LABEL: Sturh_zero_4
+; CHECK-STRICT: sturh wzr
+; CHECK-STRICT: sturh wzr
+; CHECK-STRICT: sturh wzr
+; CHECK-STRICT: sturh wzr
+define void @Sturh_zero_4(i16* nocapture %P, i32 %n) {
+entry:
+  %sub = add nsw i32 %n, -3
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+  store i16 0, i16* %arrayidx
+  %sub1 = add nsw i32 %n, -4
+  %idxprom2 = sext i32 %sub1 to i64
+  %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
+  store i16 0, i16* %arrayidx3
+  %sub4 = add nsw i32 %n, -2
+  %idxprom5 = sext i32 %sub4 to i64
+  %arrayidx6 = getelementptr inbounds i16, i16* %P, i64 %idxprom5
+  store i16 0, i16* %arrayidx6
+  %sub7 = add nsw i32 %n, -1
+  %idxprom8 = sext i32 %sub7 to i64
+  %arrayidx9 = getelementptr inbounds i16, i16* %P, i64 %idxprom8
+  store i16 0, i16* %arrayidx9
+  ret void
+}
+
+; CHECK-LABEL: Sturw_zero
+; CHECK: stur xzr
+; CHECK-STRICT-LABEL: Sturw_zero
+; CHECK-STRICT: stp wzr, wzr
+define void @Sturw_zero(i32* nocapture %P, i32 %n) {
+entry:
+  %sub = add nsw i32 %n, -3
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+  store i32 0, i32* %arrayidx
+  %sub1 = add nsw i32 %n, -4
+  %idxprom2 = sext i32 %sub1 to i64
+  %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
+  store i32 0, i32* %arrayidx3
+  ret void
+}
+
+; CHECK-LABEL: Sturw_zero_4
+; CHECK: stp xzr, xzr
+; CHECK-STRICT-LABEL: Sturw_zero_4
+; CHECK-STRICT: stp wzr, wzr
+; CHECK-STRICT: stp wzr, wzr
+define void @Sturw_zero_4(i32* nocapture %P, i32 %n) {
+entry:
+  %sub = add nsw i32 %n, -3
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+  store i32 0, i32* %arrayidx
+  %sub1 = add nsw i32 %n, -4
+  %idxprom2 = sext i32 %sub1 to i64
+  %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
+  store i32 0, i32* %arrayidx3
+  %sub4 = add nsw i32 %n, -2
+  %idxprom5 = sext i32 %sub4 to i64
+  %arrayidx6 = getelementptr inbounds i32, i32* %P, i64 %idxprom5
+  store i32 0, i32* %arrayidx6
+  %sub7 = add nsw i32 %n, -1
+  %idxprom8 = sext i32 %sub7 to i64
+  %arrayidx9 = getelementptr inbounds i32, i32* %P, i64 %idxprom8
+  store i32 0, i32* %arrayidx9
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll
index 985b5bf483ac..7b2433099031 100644
--- a/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -1,4 +1,6 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m1 | FileCheck --check-prefix=EXYNOS %s
+; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check.
 
 declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
 
@@ -382,6 +384,10 @@ define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x floa
 ; CHECK-LABEL: test_vfma_lane_f32:
 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfma_lane_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1]
+; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
   %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
@@ -394,6 +400,10 @@ define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x flo
 ; CHECK-LABEL: test_vfmaq_lane_f32:
 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmaq_lane_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1]
+; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
@@ -406,6 +416,10 @@ define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x flo
 ; CHECK-LABEL: test_vfma_laneq_f32:
 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfma_laneq_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3]
+; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
   %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
@@ -416,6 +430,10 @@ define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x fl
 ; CHECK-LABEL: test_vfmaq_laneq_f32:
 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmaq_laneq_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3]
+; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
@@ -426,6 +444,10 @@ define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x floa
 ; CHECK-LABEL: test_vfms_lane_f32:
 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfms_lane_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1]
+; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
   %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
@@ -437,6 +459,10 @@ define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x flo
 ; CHECK-LABEL: test_vfmsq_lane_f32:
 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmsq_lane_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1]
+; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
   %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -448,6 +474,10 @@ define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x flo
 ; CHECK-LABEL: test_vfms_laneq_f32:
 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfms_laneq_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3]
+; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
   %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
@@ -459,6 +489,10 @@ define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x fl
 ; CHECK-LABEL: test_vfmsq_laneq_f32:
 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmsq_laneq_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3]
+; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
   %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -470,6 +504,10 @@ define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x
 ; CHECK-LABEL: test_vfmaq_lane_f64:
 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmaq_lane_f64:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0]
+; EXYNOS: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
@@ -482,6 +520,10 @@ define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x
 ; CHECK-LABEL: test_vfmaq_laneq_f64:
 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmaq_laneq_f64:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1]
+; EXYNOS: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
@@ -492,6 +534,10 @@ define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x
 ; CHECK-LABEL: test_vfmsq_lane_f64:
 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmsq_lane_f64:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0]
+; EXYNOS: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <1 x double> <double -0.000000e+00>, %v
   %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
@@ -503,6 +549,10 @@ define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x
 ; CHECK-LABEL: test_vfmsq_laneq_f64:
 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmsq_laneq_f64:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1]
+; EXYNOS: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
   %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
@@ -514,6 +564,9 @@ define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) {
 ; CHECK-LABEL: test_vfmas_laneq_f32
 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
 ; CHECK-NEXT: ret
+; EXNOS-LABEL: test_vfmas_laneq_f32
+; EXNOS: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; EXNOS-NEXT: ret
 entry:
   %extract = extractelement <4 x float> %v, i32 3
   %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
@@ -539,6 +592,9 @@ define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmss_lane_f32
 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmss_lane_f32
+; EXYNOS: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; EXYNOS-NEXT: ret
 entry:
   %extract.rhs = extractelement <2 x float> %v, i32 1
   %extract = fsub float -0.000000e+00, %extract.rhs
@@ -561,6 +617,9 @@ define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) {
 ; CHECK-LABEL: test_vfmsd_laneq_f64
 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmsd_laneq_f64
+; EXYNOS: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; EXYNOS-NEXT: ret
 entry:
   %extract.rhs = extractelement <2 x double> %v, i32 1
   %extract = fsub double -0.000000e+00, %extract.rhs
@@ -583,6 +642,9 @@ define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmss_lane_f32_0
 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmss_lane_f32_0
+; EXYNOS: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; EXYNOS-NEXT: ret
 entry:
   %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
   %tmp1 = extractelement <2 x float> %tmp0, i32 1
@@ -1408,6 +1470,10 @@ define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmul_lane_f32:
 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmul_lane_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1]
+; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
   %mul = fmul <2 x float> %shuffle, %a
@@ -1418,6 +1484,9 @@ define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmul_lane_f64:
 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmul_lane_f64:
+; EXYNOS: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; EXYNOS-NEXT: ret
 entry:
   %0 = bitcast <1 x double> %a to <8 x i8>
   %1 = bitcast <8 x i8> %0 to double
@@ -1431,6 +1500,10 @@ define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulq_lane_f32:
 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulq_lane_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1]
+; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %mul = fmul <4 x float> %shuffle, %a
@@ -1441,6 +1514,10 @@ define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmulq_lane_f64:
 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulq_lane_f64:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0]
+; EXYNOS: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
   %mul = fmul <2 x double> %shuffle, %a
@@ -1451,6 +1528,10 @@ define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
 ; CHECK-LABEL: test_vmul_laneq_f32:
 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmul_laneq_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3]
+; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
   %mul = fmul <2 x float> %shuffle, %a
@@ -1461,6 +1542,9 @@ define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
 ; CHECK-LABEL: test_vmul_laneq_f64:
 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmul_laneq_f64:
+; EXYNOS: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; EXYNOS-NEXT: ret
 entry:
   %0 = bitcast <1 x double> %a to <8 x i8>
   %1 = bitcast <8 x i8> %0 to double
@@ -1474,6 +1558,10 @@ define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
 ; CHECK-LABEL: test_vmulq_laneq_f32:
 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulq_laneq_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3]
+; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   %mul = fmul <4 x float> %shuffle, %a
@@ -1484,6 +1572,10 @@ define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
 ; CHECK-LABEL: test_vmulq_laneq_f64:
 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulq_laneq_f64:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1]
+; EXYNOS: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
   %mul = fmul <2 x double> %shuffle, %a
@@ -1494,6 +1586,10 @@ define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulx_lane_f32:
 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulx_lane_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1]
+; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
   %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
@@ -1504,6 +1600,10 @@ define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f32:
 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulxq_lane_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1]
+; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; Exynos-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
@@ -1514,6 +1614,10 @@ define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f64:
 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulxq_lane_f64:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0]
+; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
   %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
@@ -1524,6 +1628,10 @@ define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
 ; CHECK-LABEL: test_vmulx_laneq_f32:
 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulx_laneq_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3]
+; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
   %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
@@ -1534,6 +1642,10 @@ define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
 ; CHECK-LABEL: test_vmulxq_laneq_f32:
 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulxq_laneq_f32:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3]
+; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
@@ -1544,6 +1656,10 @@ define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
 ; CHECK-LABEL: test_vmulxq_laneq_f64:
 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulxq_laneq_f64:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1]
+; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
   %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
@@ -1890,6 +2006,10 @@ define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x fl
 ; CHECK-LABEL: test_vfma_lane_f32_0:
 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfma_lane_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
   %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
@@ -1900,6 +2020,10 @@ define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x f
 ; CHECK-LABEL: test_vfmaq_lane_f32_0:
 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmaq_lane_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
@@ -1910,6 +2034,10 @@ define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x f
 ; CHECK-LABEL: test_vfma_laneq_f32_0:
 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfma_laneq_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
   %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
@@ -1920,6 +2048,10 @@ define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x
 ; CHECK-LABEL: test_vfmaq_laneq_f32_0:
 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmaq_laneq_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
@@ -1930,6 +2062,10 @@ define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x fl
 ; CHECK-LABEL: test_vfms_lane_f32_0:
 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfms_lane_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
   %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
@@ -1941,6 +2077,10 @@ define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x f
 ; CHECK-LABEL: test_vfmsq_lane_f32_0:
 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmsq_lane_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
   %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
@@ -1952,6 +2092,10 @@ define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x f
 ; CHECK-LABEL: test_vfms_laneq_f32_0:
 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfms_laneq_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
   %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
@@ -1963,6 +2107,10 @@ define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x
 ; CHECK-LABEL: test_vfmsq_laneq_f32_0:
 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmsq_laneq_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
   %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
@@ -1974,6 +2122,10 @@ define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2
 ; CHECK-LABEL: test_vfmaq_laneq_f64_0:
 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmaq_laneq_f64_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0]
+; EXYNOS: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
@@ -1984,6 +2136,10 @@ define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2
 ; CHECK-LABEL: test_vfmsq_laneq_f64_0:
 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vfmsq_laneq_f64_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0]
+; EXYNOS: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
   %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
@@ -2787,6 +2943,10 @@ define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmul_lane_f32_0:
 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmul_lane_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
   %mul = fmul <2 x float> %shuffle, %a
@@ -2797,6 +2957,10 @@ define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulq_lane_f32_0:
 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulq_lane_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
   %mul = fmul <4 x float> %shuffle, %a
@@ -2807,6 +2971,10 @@ define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
 ; CHECK-LABEL: test_vmul_laneq_f32_0:
 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmul_laneq_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
   %mul = fmul <2 x float> %shuffle, %a
@@ -2817,6 +2985,9 @@ define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
 ; CHECK-LABEL: test_vmul_laneq_f64_0:
 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmul_laneq_f64_0:
+; EXYNOS: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+; EXYNOS-NEXT: ret
 entry:
   %0 = bitcast <1 x double> %a to <8 x i8>
   %1 = bitcast <8 x i8> %0 to double
@@ -2830,6 +3001,10 @@ define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
 ; CHECK-LABEL: test_vmulq_laneq_f32_0:
 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulq_laneq_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0]
+; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
   %mul = fmul <4 x float> %shuffle, %a
@@ -2840,6 +3015,10 @@ define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
 ; CHECK-LABEL: test_vmulq_laneq_f64_0:
 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulq_laneq_f64_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0]
+; EXYNOS: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
   %mul = fmul <2 x double> %shuffle, %a
@@ -2850,6 +3029,10 @@ define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulx_lane_f32_0:
 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulx_lane_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0]
+; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
   %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
@@ -2860,6 +3043,10 @@ define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f32_0:
 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulxq_lane_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0]
+; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
   %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
@@ -2870,6 +3057,10 @@ define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f64_0:
 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulxq_lane_f64_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0]
+; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
   %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
@@ -2880,6 +3071,10 @@ define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
 ; CHECK-LABEL: test_vmulx_laneq_f32_0:
 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulx_laneq_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0]
+; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
   %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
@@ -2890,6 +3085,10 @@ define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
 ; CHECK-LABEL: test_vmulxq_laneq_f32_0:
 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulxq_laneq_f32_0:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0]
+; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
   %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
@@ -2900,9 +3099,51 @@ define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
 ; CHECK-LABEL: test_vmulxq_laneq_f64_0:
 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
 ; CHECK-NEXT: ret
+; EXYNOS-LABEL: test_vmulxq_laneq_f64_0:
+; EXYNOS: dup  [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0]
+; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d
+; EXYNOS-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
   %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
   ret <2 x double> %vmulx2.i
 }
 
+define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) {
+; CHECK-LABEL: optimize_dup:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+; EXYNOS-LABEL: optimize_dup:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3]
+; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS-NEXT: ret
+entry:
+  %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a)
+  %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+	%1 = fmul <4 x float> %lane2, %c
+	%s = fsub <4 x float> %0, %1
+  ret <4 x float> %s
+}
+
+define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) {
+; CHECK-LABEL: no_optimize_dup:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+; EXYNOS-LABEL: no_optimize_dup:
+; EXYNOS: dup  [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3]
+; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s
+; EXYNOS: dup  [[y:v[0-9]+]].4s, {{v[0-9]+}}.s[1]
+; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[y]].4s
+; EXYNOS-NEXT: ret
+entry:
+  %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a)
+  %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+	%1 = fmul <4 x float> %lane2, %c
+	%s = fsub <4 x float> %0, %1
+  ret <4 x float> %s
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-add-sub.ll b/test/CodeGen/AArch64/arm64-neon-add-sub.ll
index fbde606538ca..40836a73e0ca 100644
--- a/test/CodeGen/AArch64/arm64-neon-add-sub.ll
+++ b/test/CodeGen/AArch64/arm64-neon-add-sub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -aarch64-simd-scalar| FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -aarch64-enable-simd-scalar| FileCheck %s
 
 define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
 ;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
diff --git a/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
index 51ed8a13cd2e..45dba479ccc4 100644
--- a/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
+++ b/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V8a
-; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V81a
-; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-V81a-apple
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V8a
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+v8.1a -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V81a
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+v8.1a -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-V81a-apple
 
 declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
index caf4498276ce..f68a9debd5f2 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
@@ -13,7 +13,7 @@
 define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
 ; CHECK-LABEL: jscall_patchpoint_codegen:
-; CHECK:       Ltmp
+; CHECK:       Lcfi
 ; CHECK:       str x{{.+}}, [sp]
 ; CHECK-NEXT:  mov  x0, x{{.+}}
 ; CHECK:       Ltmp
@@ -22,7 +22,7 @@ entry:
 ; CHECK:  movk  x16, #48879
 ; CHECK-NEXT:  blr x16
 ; FAST-LABEL:  jscall_patchpoint_codegen:
-; FAST:        Ltmp
+; FAST:        Lcfi
 ; FAST:        str x{{.+}}, [sp]
 ; FAST:        Ltmp
 ; FAST-NEXT:   mov   x16, #281470681743360
@@ -40,7 +40,7 @@ entry:
 define i64 @jscall_patchpoint_codegen2(i64 %callee) {
 entry:
 ; CHECK-LABEL: jscall_patchpoint_codegen2:
-; CHECK:       Ltmp
+; CHECK:       Lcfi
 ; CHECK:       orr w[[REG:[0-9]+]], wzr, #0x6
 ; CHECK-NEXT:  str x[[REG]], [sp, #24]
 ; CHECK-NEXT:  orr w[[REG:[0-9]+]], wzr, #0x4
@@ -53,7 +53,7 @@ entry:
 ; CHECK-NEXT:  movk  x16, #48879
 ; CHECK-NEXT:  blr x16
 ; FAST-LABEL:  jscall_patchpoint_codegen2:
-; FAST:        Ltmp
+; FAST:        Lcfi
 ; FAST:        orr [[REG1:x[0-9]+]], xzr, #0x2
 ; FAST-NEXT:   orr [[REG2:w[0-9]+]], wzr, #0x4
 ; FAST-NEXT:   orr [[REG3:x[0-9]+]], xzr, #0x6
@@ -74,7 +74,7 @@ entry:
 define i64 @jscall_patchpoint_codegen3(i64 %callee) {
 entry:
 ; CHECK-LABEL: jscall_patchpoint_codegen3:
-; CHECK:       Ltmp
+; CHECK:       Lcfi
 ; CHECK:       mov  w[[REG:[0-9]+]], #10
 ; CHECK-NEXT:  str x[[REG]], [sp, #48]
 ; CHECK-NEXT:  orr w[[REG:[0-9]+]], wzr, #0x8
@@ -91,7 +91,7 @@ entry:
 ; CHECK-NEXT:  movk  x16, #48879
 ; CHECK-NEXT:  blr x16
 ; FAST-LABEL:  jscall_patchpoint_codegen3:
-; FAST:        Ltmp
+; FAST:        Lcfi
 ; FAST:        orr [[REG1:x[0-9]+]], xzr, #0x2
 ; FAST-NEXT:   orr [[REG2:w[0-9]+]], wzr, #0x4
 ; FAST-NEXT:   orr [[REG3:x[0-9]+]], xzr, #0x6
diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll
index 9ee53a0f92e6..6fb42b279447 100644
--- a/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -march=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
 
 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
diff --git a/test/CodeGen/AArch64/arm64-prefetch.ll b/test/CodeGen/AArch64/arm64-prefetch.ll
index bdeacb231fdd..733ba94b110f 100644
--- a/test/CodeGen/AArch64/arm64-prefetch.ll
+++ b/test/CodeGen/AArch64/arm64-prefetch.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -march arm64 -o - | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 @a = common global i32* null, align 8
 
diff --git a/test/CodeGen/AArch64/arm64-promote-const.ll b/test/CodeGen/AArch64/arm64-promote-const.ll
index 0be2f5c08c00..2b7c782947f1 100644
--- a/test/CodeGen/AArch64/arm64-promote-const.ll
+++ b/test/CodeGen/AArch64/arm64-promote-const.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -aarch64-stress-promote-const -mcpu=cyclone | FileCheck -check-prefix=PROMOTED %s
 ; The REGULAR run just checks that the inputs passed to promote const expose
 ; the appropriate patterns.
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -aarch64-promote-const=false -mcpu=cyclone | FileCheck -check-prefix=REGULAR %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -aarch64-enable-promote-const=false -mcpu=cyclone | FileCheck -check-prefix=REGULAR %s
 
 %struct.uint8x16x4_t = type { [4 x <16 x i8>] }
 
diff --git a/test/CodeGen/AArch64/arm64-redzone.ll b/test/CodeGen/AArch64/arm64-redzone.ll
index 837249cb26c6..dcb839f4cdd0 100644
--- a/test/CodeGen/AArch64/arm64-redzone.ll
+++ b/test/CodeGen/AArch64/arm64-redzone.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-redzone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-redzone | FileCheck %s
 
 define i32 @foo(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll b/test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll
index a1daf03f4fa9..cf93e0e8e698 100644
--- a/test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll
+++ b/test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s
 
 ; We used to not mark NZCV as being used in the continuation basic-block
 ; when lowering a 128-bit "select" to branches. This meant a subsequent use
diff --git a/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll b/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
index d376aaf56817..d4814dc62609 100644
--- a/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
+++ b/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -o - %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 ; This is mostly a "don't assert" test. The type of the RHS of a shift depended
 ; on the phase of legalization, which led to the creation of an unexpected and
diff --git a/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir b/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir
index 3948c0457bcd..bda025af5193 100644
--- a/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir
+++ b/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir
@@ -1,4 +1,3 @@
-# RUN: rm -f %S/arm64-regress-opt-cmp.s
 # RUN: llc -mtriple=aarch64-linux-gnu -run-pass peephole-opt -o - %s 2>&1 | FileCheck %s
 # CHECK: %1 = ANDWri {{.*}}
 # CHECK-NEXT: %wzr = SUBSWri {{.*}}
diff --git a/test/CodeGen/AArch64/arm64-return-vector.ll b/test/CodeGen/AArch64/arm64-return-vector.ll
index 3262c91c04df..2167c6664b9e 100644
--- a/test/CodeGen/AArch64/arm64-return-vector.ll
+++ b/test/CodeGen/AArch64/arm64-return-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 ; 2x64 vector should be returned in Q0.
 
diff --git a/test/CodeGen/AArch64/arm64-returnaddr.ll b/test/CodeGen/AArch64/arm64-returnaddr.ll
index 285b29563c09..1e0ec5b2e5a1 100644
--- a/test/CodeGen/AArch64/arm64-returnaddr.ll
+++ b/test/CodeGen/AArch64/arm64-returnaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define i8* @rt0(i32 %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-rev.ll b/test/CodeGen/AArch64/arm64-rev.ll
index 4980d7e3b275..1ce5ab44e292 100644
--- a/test/CodeGen/AArch64/arm64-rev.ll
+++ b/test/CodeGen/AArch64/arm64-rev.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define i32 @test_rev_w(i32 %a) nounwind {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-scvt.ll b/test/CodeGen/AArch64/arm64-scvt.ll
index fc64d7bfda68..4697e1feff4b 100644
--- a/test/CodeGen/AArch64/arm64-scvt.ll
+++ b/test/CodeGen/AArch64/arm64-scvt.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 | FileCheck --check-prefix=CHECK-A57 %s
+; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cortex-a57 | FileCheck --check-prefix=CHECK-A57 %s
 ; rdar://13082402
 
 define float @t1(i32* nocapture %src) nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-shifted-sext.ll b/test/CodeGen/AArch64/arm64-shifted-sext.ll
index 71f15b1222b2..cbdf6d3dd30a 100644
--- a/test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ b/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
 ;
 ; <rdar://problem/13820218>
 
diff --git a/test/CodeGen/AArch64/arm64-shrink-v1i64.ll b/test/CodeGen/AArch64/arm64-shrink-v1i64.ll
index f31a5702761c..3e926d427403 100644
--- a/test/CodeGen/AArch64/arm64-shrink-v1i64.ll
+++ b/test/CodeGen/AArch64/arm64-shrink-v1i64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 < %s
+; RUN: llc < %s -mtriple=arm64-eabi
 
 ; The DAGCombiner tries to do following shrink:
 ;     Convert x+y to (VT)((SmallVT)x+(SmallVT)y)
diff --git a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 16ae7ef8e1b7..255cd8e4a0d3 100644
--- a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -78,8 +78,8 @@ declare i32 @doSomething(i32, i32*)
 ; Next BB.
 ; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body
 ; CHECK: bl _something
-; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
 ; CHECK-NEXT: sub [[IV]], [[IV]], #1
+; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
 ; CHECK-NEXT: cbnz [[IV]], [[LOOP]]
 ;
 ; Next BB.
@@ -144,8 +144,8 @@ declare i32 @something(...)
 ; Next BB.
 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
 ; CHECK: bl _something
-; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
 ; CHECK-NEXT: sub [[IV]], [[IV]], #1
+; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
 ; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
 ; Next BB.
 ; CHECK: ; %for.end
@@ -188,8 +188,8 @@ for.end:                                          ; preds = %for.body
 ;
 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
 ; CHECK: bl _something
-; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
 ; CHECK-NEXT: sub [[IV]], [[IV]], #1
+; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
 ; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
 ; Next BB.
 ; CHECK: bl _somethingElse
@@ -259,8 +259,8 @@ declare void @somethingElse(...)
 ;
 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
 ; CHECK: bl _something
-; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
 ; CHECK-NEXT: sub [[IV]], [[IV]], #1
+; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
 ; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
 ; Next BB.
 ; CHECK: lsl w0, [[SUM]], #3
@@ -333,32 +333,32 @@ entry:
 ;
 ; Sum is merged with the returned register.
 ; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16
-; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
 ; CHECK-NEXT: cmp w1, #1
+; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
+; CHECK-NEXT: mov [[SUM:w0]], wzr
 ; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]]
-; CHECK: mov [[SUM:w0]], wzr
 ;
 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
 ; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8]
 ; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8
 ; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8]
 ; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]]
-; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]]
 ; CHECK-NEXT: sub w1, w1, #1
+; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]]
 ; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
+; DISABLE-NEXT: b [[IFEND_LABEL]]
 ;
-; DISABLE-NEXT: b
 ; DISABLE: [[ELSE_LABEL]]: ; %if.else
 ; DISABLE: lsl w0, w1, #1
 ;
-; ENABLE: [[ELSE_LABEL]]: ; %if.else
-; ENABLE: lsl w0, w1, #1
-; ENABLE-NEXT: ret
-;
 ; CHECK: [[IFEND_LABEL]]:
 ; Epilogue code.
 ; CHECK: add sp, sp, #16
 ; CHECK-NEXT: ret
+;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; ENABLE-NEXT: lsl w0, w1, #1
+; ENABLE_NEXT: ret
 define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
 entry:
   %ap = alloca i8*, align 8
@@ -413,9 +413,9 @@ declare void @llvm.va_end(i8*)
 ;
 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
 ; Inline asm statement.
-; CHECK: add x19, x19, #1
 ; CHECK: sub [[IV]], [[IV]], #1
-; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
+; CHECK: add x19, x19, #1
+; CHECK: cbnz [[IV]], [[LOOP_LABEL]]
 ; Next BB.
 ; CHECK: mov w0, wzr
 ; Epilogue code.
@@ -508,8 +508,7 @@ declare i32 @someVariadicFunc(i32, ...)
 ; CHECK-LABEL: noreturn:
 ; DISABLE: stp
 ;
-; CHECK: and [[TEST:w[0-9]+]], w0, #0xff
-; CHECK-NEXT: cbnz [[TEST]], [[ABORT:LBB[0-9_]+]]
+; CHECK: cbnz w0, [[ABORT:LBB[0-9_]+]]
 ;
 ; CHECK: mov w0, #42
 ;
diff --git a/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll
index aed39e7ed8cb..e72c2b7989d2 100644
--- a/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll
+++ b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST
 
 define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp {
 ; CHECK: uaddlv.16b h0, v0
diff --git a/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll b/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
index 21131657820f..269282cd473c 100644
--- a/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
+++ b/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -o -  %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 ; ARM64ISelLowering.cpp was creating a new (floating-point) load for efficiency
 ; but not updating chain-successors of the old one. As a result, the two memory
diff --git a/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
index 7fec53993bc1..b26542d759e4 100644
--- a/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
+++ b/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -aarch64-shift-insert-generation=true -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc < %s -aarch64-shift-insert-generation=true -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
 ; CHECK-LABEL: testLeftGood:
diff --git a/test/CodeGen/AArch64/arm64-smaxv.ll b/test/CodeGen/AArch64/arm64-smaxv.ll
index 8cc4502f6caa..fc975f352365 100644
--- a/test/CodeGen/AArch64/arm64-smaxv.ll
+++ b/test/CodeGen/AArch64/arm64-smaxv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 define signext i8 @test_vmaxv_s8(<8 x i8> %a1) {
 ; CHECK: test_vmaxv_s8
diff --git a/test/CodeGen/AArch64/arm64-sminv.ll b/test/CodeGen/AArch64/arm64-sminv.ll
index c1650b5fb294..c721b0d5f324 100644
--- a/test/CodeGen/AArch64/arm64-sminv.ll
+++ b/test/CodeGen/AArch64/arm64-sminv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 define signext i8 @test_vminv_s8(<8 x i8> %a1) {
 ; CHECK: test_vminv_s8
diff --git a/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll b/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll
index 3949b85fbd32..79ed067d9ad4 100644
--- a/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll
+++ b/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -march=arm64 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi | FileCheck %s
 
 ; Check if sqshl/uqshl with constant shift amout can be selected. 
 define i64 @test_vqshld_s64_i(i64 %a) {
diff --git a/test/CodeGen/AArch64/arm64-st1.ll b/test/CodeGen/AArch64/arm64-st1.ll
index 0387a91ea0e8..28ee8fcf46fc 100644
--- a/test/CodeGen/AArch64/arm64-st1.ll
+++ b/test/CodeGen/AArch64/arm64-st1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
 
 define void @st1lane_16b(<16 x i8> %A, i8* %D) {
 ; CHECK-LABEL: st1lane_16b
diff --git a/test/CodeGen/AArch64/arm64-stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll
index 3eb1d2753001..0b2e9776263d 100644
--- a/test/CodeGen/AArch64/arm64-stackmap.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap.ll
@@ -10,7 +10,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 2
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -23,26 +23,37 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; Functions and stack size
 ; CHECK-NEXT:   .quad _constantargs
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _osrinline
 ; CHECK-NEXT:   .quad 32
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _osrcold
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _propertyRead
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _propertyWrite
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _jsVoidCall
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _jsIntCall
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _spilledValue
 ; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _spilledStackMapValue
 ; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _liveConstant
 ; CHECK-NEXT:   .quad 16
+; CHECK-NEXT:   .quad 1
 ; CHECK-NEXT:   .quad _clobberLR
 ; CHECK-NEXT:   .quad 112
+; CHECK-NEXT:   .quad 1
 
 ; Num LargeConstants
 ; CHECK-NEXT:   .quad   4294967295
diff --git a/test/CodeGen/AArch64/arm64-stp-aa.ll b/test/CodeGen/AArch64/arm64-stp-aa.ll
index 2a45745fedb5..5b34017cf36a 100644
--- a/test/CodeGen/AArch64/arm64-stp-aa.ll
+++ b/test/CodeGen/AArch64/arm64-stp-aa.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -enable-misched=false -aarch64-stp-suppress=false -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -enable-misched=false -aarch64-enable-stp-suppress=false -verify-machineinstrs | FileCheck %s
 
 ; The next set of tests makes sure we can combine the second instruction into
 ; the first.
diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll
index 5664c7d118c3..cc4591c8aece 100644
--- a/test/CodeGen/AArch64/arm64-stp.ll
+++ b/test/CodeGen/AArch64/arm64-stp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
 
 ; CHECK-LABEL: stp_int
 ; CHECK: stp w0, w1, [x2]
@@ -98,6 +98,51 @@ entry:
   ret void
 }
 
+; Check that a non-splat store that is storing a vector created by 4
+; insertelements that is not a splat vector does not get split.
+define void @nosplat_v4i32(i32 %v, i32 *%p) {
+entry:
+
+; CHECK-LABEL: nosplat_v4i32:
+; CHECK: str w0,
+; CHECK: ldr q[[REG1:[0-9]+]],
+; CHECK-DAG: ins v[[REG1]].s[1], w0
+; CHECK-DAG: ins v[[REG1]].s[2], w0
+; CHECK-DAG: ins v[[REG1]].s[3], w0
+; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
+; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: ret
+
+  %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v
+  %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
+  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
+  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
+  %p21 = bitcast i32* %p to <4 x i32>*
+  store <4 x i32> %p20, <4 x i32>* %p21, align 4
+  ret void
+}
+
+; Check that a non-splat store that is storing a vector created by 4
+; insertelements that is not a splat vector does not get split.
+define void @nosplat2_v4i32(i32 %v, i32 *%p, <4 x i32> %vin) {
+entry:
+
+; CHECK-LABEL: nosplat2_v4i32:
+; CHECK: ins v[[REG1]].s[1], w0
+; CHECK-DAG: ins v[[REG1]].s[2], w0
+; CHECK-DAG: ins v[[REG1]].s[3], w0
+; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
+; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: ret
+
+  %p18 = insertelement <4 x i32> %vin, i32 %v, i32 1
+  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
+  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
+  %p21 = bitcast i32* %p to <4 x i32>*
+  store <4 x i32> %p20, <4 x i32>* %p21, align 4
+  ret void
+}
+
 ; Read of %b to compute %tmp2 shouldn't prevent formation of stp
 ; CHECK-LABEL: stp_int_rar_hazard
 ; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]
diff --git a/test/CodeGen/AArch64/arm64-stur.ll b/test/CodeGen/AArch64/arm64-stur.ll
index 5f4cb9f3d95a..4a3229a39b50 100644
--- a/test/CodeGen/AArch64/arm64-stur.ll
+++ b/test/CodeGen/AArch64/arm64-stur.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
 %struct.X = type <{ i32, i64, i64 }>
 
 define void @foo1(i32* %p, i64 %val) nounwind {
diff --git a/test/CodeGen/AArch64/arm64-subsections.ll b/test/CodeGen/AArch64/arm64-subsections.ll
index 316e7c3a8ebd..1449b857ec6d 100644
--- a/test/CodeGen/AArch64/arm64-subsections.ll
+++ b/test/CodeGen/AArch64/arm64-subsections.ll
@@ -2,4 +2,4 @@
 ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK-ELF
 
 ; CHECK-MACHO: .subsections_via_symbols
-; CHECK-ELF-NOT: .subsections_via_symbols
-\ No newline at end of file
+; CHECK-ELF-NOT: .subsections_via_symbols
diff --git a/test/CodeGen/AArch64/arm64-subvector-extend.ll b/test/CodeGen/AArch64/arm64-subvector-extend.ll
index d5a178a9e656..2bc64aa8d644 100644
--- a/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 ; Test efficient codegen of vector extends up from legal type to 128 bit
 ; and 256 bit vector types.
diff --git a/test/CodeGen/AArch64/arm64-tbl.ll b/test/CodeGen/AArch64/arm64-tbl.ll
index b1ce15a1e19a..d1b54b8a6264 100644
--- a/test/CodeGen/AArch64/arm64-tbl.ll
+++ b/test/CodeGen/AArch64/arm64-tbl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
 ; CHECK: tbl1_8b
diff --git a/test/CodeGen/AArch64/arm64-this-return.ll b/test/CodeGen/AArch64/arm64-this-return.ll
index 9fc68f476b77..177f442052f5 100644
--- a/test/CodeGen/AArch64/arm64-this-return.ll
+++ b/test/CodeGen/AArch64/arm64-this-return.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-this-return-forwarding | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 %struct.A = type { i8 }
 %struct.B = type { i32 }
diff --git a/test/CodeGen/AArch64/arm64-trap.ll b/test/CodeGen/AArch64/arm64-trap.ll
index 5e99c32c57b3..eb06bddecc13 100644
--- a/test/CodeGen/AArch64/arm64-trap.ll
+++ b/test/CodeGen/AArch64/arm64-trap.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 define void @foo() nounwind {
 ; CHECK: foo
 ; CHECK: brk #0x1
diff --git a/test/CodeGen/AArch64/arm64-trn.ll b/test/CodeGen/AArch64/arm64-trn.ll
index 92ccf05a3c94..f73cb8d3095f 100644
--- a/test/CodeGen/AArch64/arm64-trn.ll
+++ b/test/CodeGen/AArch64/arm64-trn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtrni8:
diff --git a/test/CodeGen/AArch64/arm64-umaxv.ll b/test/CodeGen/AArch64/arm64-umaxv.ll
index a77f228cb156..c60489364275 100644
--- a/test/CodeGen/AArch64/arm64-umaxv.ll
+++ b/test/CodeGen/AArch64/arm64-umaxv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp {
 ; CHECK-LABEL: vmax_u8x8:
diff --git a/test/CodeGen/AArch64/arm64-uminv.ll b/test/CodeGen/AArch64/arm64-uminv.ll
index 2181db46ea96..124e7969f6be 100644
--- a/test/CodeGen/AArch64/arm64-uminv.ll
+++ b/test/CodeGen/AArch64/arm64-uminv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp {
 ; CHECK-LABEL: vmin_u8x8:
diff --git a/test/CodeGen/AArch64/arm64-umov.ll b/test/CodeGen/AArch64/arm64-umov.ll
index a1ef9908646a..d9fa54fa83bc 100644
--- a/test/CodeGen/AArch64/arm64-umov.ll
+++ b/test/CodeGen/AArch64/arm64-umov.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define zeroext i8 @f1(<16 x i8> %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/AArch64/arm64-unaligned_ldst.ll b/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
index dab8b0f5b6d1..20093e587bc3 100644
--- a/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
+++ b/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 ; rdar://r11231896
 
 define void @t1(i8* nocapture %a, i8* nocapture %b) nounwind {
diff --git a/test/CodeGen/AArch64/arm64-uzp.ll b/test/CodeGen/AArch64/arm64-uzp.ll
index 517ebae6dabd..0ffd91971697 100644
--- a/test/CodeGen/AArch64/arm64-uzp.ll
+++ b/test/CodeGen/AArch64/arm64-uzp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vuzpi8:
diff --git a/test/CodeGen/AArch64/arm64-vaargs.ll b/test/CodeGen/AArch64/arm64-vaargs.ll
index ce07635a5c87..47dea611bc7e 100644
--- a/test/CodeGen/AArch64/arm64-vaargs.ll
+++ b/test/CodeGen/AArch64/arm64-vaargs.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-darwin11.0.0 | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
-target triple = "arm64-apple-darwin11.0.0"
 
 define float @t1(i8* nocapture %fmt, ...) nounwind ssp {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll
index c1800085884c..c7b0c33550d0 100644
--- a/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 
 define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
diff --git a/test/CodeGen/AArch64/arm64-vadd.ll b/test/CodeGen/AArch64/arm64-vadd.ll
index e3d8dd256956..9d09251524ea 100644
--- a/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 define <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: addhn8b:
diff --git a/test/CodeGen/AArch64/arm64-vaddlv.ll b/test/CodeGen/AArch64/arm64-vaddlv.ll
index 2d6413812ec8..903a9e9b5010 100644
--- a/test/CodeGen/AArch64/arm64-vaddlv.ll
+++ b/test/CodeGen/AArch64/arm64-vaddlv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone {
 ; CHECK: test_vaddlv_s32
diff --git a/test/CodeGen/AArch64/arm64-vaddv.ll b/test/CodeGen/AArch64/arm64-vaddv.ll
index 589319bb3227..55dbebf0c9fe 100644
--- a/test/CodeGen/AArch64/arm64-vaddv.ll
+++ b/test/CodeGen/AArch64/arm64-vaddv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -asm-verbose=false -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false -mcpu=cyclone | FileCheck %s
 
 define signext i8 @test_vaddv_s8(<8 x i8> %a1) {
 ; CHECK-LABEL: test_vaddv_s8:
diff --git a/test/CodeGen/AArch64/arm64-vbitwise.ll b/test/CodeGen/AArch64/arm64-vbitwise.ll
index 9cfcaafe9491..34d3570f4c6d 100644
--- a/test/CodeGen/AArch64/arm64-vbitwise.ll
+++ b/test/CodeGen/AArch64/arm64-vbitwise.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: rbit_8b:
diff --git a/test/CodeGen/AArch64/arm64-vclz.ll b/test/CodeGen/AArch64/arm64-vclz.ll
index 10118f0d5638..016df56531f3 100644
--- a/test/CodeGen/AArch64/arm64-vclz.ll
+++ b/test/CodeGen/AArch64/arm64-vclz.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_u8:
diff --git a/test/CodeGen/AArch64/arm64-vcmp.ll b/test/CodeGen/AArch64/arm64-vcmp.ll
index 1b33eb58e86f..167cef9218a3 100644
--- a/test/CodeGen/AArch64/arm64-vcmp.ll
+++ b/test/CodeGen/AArch64/arm64-vcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 
 define void @fcmltz_4s(<4 x float> %a, <4 x i16>* %p) nounwind {
diff --git a/test/CodeGen/AArch64/arm64-vcnt.ll b/test/CodeGen/AArch64/arm64-vcnt.ll
index 5cff10cb8d16..4e8147cb806a 100644
--- a/test/CodeGen/AArch64/arm64-vcnt.ll
+++ b/test/CodeGen/AArch64/arm64-vcnt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: cls_8b:
diff --git a/test/CodeGen/AArch64/arm64-vcombine.ll b/test/CodeGen/AArch64/arm64-vcombine.ll
index fa1299603af3..7e0b5803a951 100644
--- a/test/CodeGen/AArch64/arm64-vcombine.ll
+++ b/test/CodeGen/AArch64/arm64-vcombine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 ; LowerCONCAT_VECTORS() was reversing the order of two parts.
 ; rdar://11558157
diff --git a/test/CodeGen/AArch64/arm64-vcvt.ll b/test/CodeGen/AArch64/arm64-vcvt.ll
index 13d2d288b2c4..f7437bc27ec2 100644
--- a/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtas_2s:
diff --git a/test/CodeGen/AArch64/arm64-vcvt_f.ll b/test/CodeGen/AArch64/arm64-vcvt_f.ll
index 1f393c21a1a1..254671a3c3c5 100644
--- a/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -O0 -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
 ; CHECK-LABEL: test_vcvt_f64_f32:
diff --git a/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll b/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
index 1eb7b43d5755..310dc711fdc2 100644
--- a/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x float> @ucvt(<2 x i32> %a) nounwind readnone ssp {
 ; CHECK-LABEL: ucvt:
diff --git a/test/CodeGen/AArch64/arm64-vcvt_n.ll b/test/CodeGen/AArch64/arm64-vcvt_n.ll
index 7ed5be6e8af9..c2380a390577 100644
--- a/test/CodeGen/AArch64/arm64-vcvt_n.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt_n.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x float> @cvtf32fxpu(<2 x i32> %a) nounwind readnone ssp {
 ; CHECK-LABEL: cvtf32fxpu:
diff --git a/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll b/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll
index 985a5f762439..a8a671b7bbd4 100644
--- a/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x i32> @c1(<2 x float> %a) nounwind readnone ssp {
 ; CHECK: c1
diff --git a/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll b/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll
index b29c22cbfda5..845b8cb9a1fe 100644
--- a/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll
+++ b/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define float @fcvtxn(double %a) {
 ; CHECK-LABEL: fcvtxn:
diff --git a/test/CodeGen/AArch64/arm64-vecCmpBr.ll b/test/CodeGen/AArch64/arm64-vecCmpBr.ll
index 0c496fedfc2a..e49810ceabf2 100644
--- a/test/CodeGen/AArch64/arm64-vecCmpBr.ll
+++ b/test/CodeGen/AArch64/arm64-vecCmpBr.ll
@@ -1,7 +1,6 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios3.0.0 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
 ; ModuleID = 'arm64_vecCmpBr.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios3.0.0"
 
 
 define i32 @anyZero64(<4 x i16> %a) #0 {
diff --git a/test/CodeGen/AArch64/arm64-vecFold.ll b/test/CodeGen/AArch64/arm64-vecFold.ll
index aeacfccab3c4..3123546b24ff 100644
--- a/test/CodeGen/AArch64/arm64-vecFold.ll
+++ b/test/CodeGen/AArch64/arm64-vecFold.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple -o - %s| FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <16 x i8> @foov16i8(<8 x i16> %a0, <8 x i16> %b0) nounwind readnone ssp {
 ; CHECK-LABEL: foov16i8:
diff --git a/test/CodeGen/AArch64/arm64-vector-ext.ll b/test/CodeGen/AArch64/arm64-vector-ext.ll
index 241c3dcb9825..68892eeacf37 100644
--- a/test/CodeGen/AArch64/arm64-vector-ext.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 ;CHECK: @func30
 ;CHECK: movi.4h v1, #1
diff --git a/test/CodeGen/AArch64/arm64-vector-imm.ll b/test/CodeGen/AArch64/arm64-vector-imm.ll
index aa3ffd261d4b..0a8087417252 100644
--- a/test/CodeGen/AArch64/arm64-vector-imm.ll
+++ b/test/CodeGen/AArch64/arm64-vector-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
 ; CHECK-LABEL: v_orrimm:
diff --git a/test/CodeGen/AArch64/arm64-vector-insertion.ll b/test/CodeGen/AArch64/arm64-vector-insertion.ll
index 8fbff71f9fc2..b10af31d5e1f 100644
--- a/test/CodeGen/AArch64/arm64-vector-insertion.ll
+++ b/test/CodeGen/AArch64/arm64-vector-insertion.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mcpu=generic -aarch64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -mcpu=generic -aarch64-neon-syntax=apple | FileCheck %s
 
 define void @test0f(float* nocapture %x, float %a) #0 {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-vector-ldst.ll b/test/CodeGen/AArch64/arm64-vector-ldst.ll
index 26b9d62c8f6a..938b3d1d0593 100644
--- a/test/CodeGen/AArch64/arm64-vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ldst.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
 
 ; rdar://9428579
 
diff --git a/test/CodeGen/AArch64/arm64-vext.ll b/test/CodeGen/AArch64/arm64-vext.ll
index fa57eeb246cc..b315e4c409b0 100644
--- a/test/CodeGen/AArch64/arm64-vext.ll
+++ b/test/CodeGen/AArch64/arm64-vext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define void @test_vext_s8() nounwind ssp {
   ; CHECK-LABEL: test_vext_s8:
diff --git a/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
index 255a18216de5..24537477c4cc 100644
--- a/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 ;;; Float vectors
 
diff --git a/test/CodeGen/AArch64/arm64-vhadd.ll b/test/CodeGen/AArch64/arm64-vhadd.ll
index 2e82b2a72541..cd650e1debf8 100644
--- a/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: shadd8b:
diff --git a/test/CodeGen/AArch64/arm64-vhsub.ll b/test/CodeGen/AArch64/arm64-vhsub.ll
index e50fd3d35896..b2ee87f1e3fb 100644
--- a/test/CodeGen/AArch64/arm64-vhsub.ll
+++ b/test/CodeGen/AArch64/arm64-vhsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @shsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: shsub8b:
diff --git a/test/CodeGen/AArch64/arm64-vmax.ll b/test/CodeGen/AArch64/arm64-vmax.ll
index 7e363231b360..e02222836144 100644
--- a/test/CodeGen/AArch64/arm64-vmax.ll
+++ b/test/CodeGen/AArch64/arm64-vmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: smax_8b:
@@ -244,7 +244,7 @@ declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind r
 declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: smaxp_8b:
@@ -368,7 +368,7 @@ declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind
 declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sminp_8b:
diff --git a/test/CodeGen/AArch64/arm64-vminmaxnm.ll b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
index 302ba9d681c6..b9cd1bec1774 100644
--- a/test/CodeGen/AArch64/arm64-vminmaxnm.ll
+++ b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x float> @f1(<2 x float> %a, <2 x float> %b) nounwind readnone ssp {
 ; CHECK: fmaxnm.2s	v0, v0, v1
diff --git a/test/CodeGen/AArch64/arm64-vmovn.ll b/test/CodeGen/AArch64/arm64-vmovn.ll
index 67e2816a7f5f..8e8642f90f13 100644
--- a/test/CodeGen/AArch64/arm64-vmovn.ll
+++ b/test/CodeGen/AArch64/arm64-vmovn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @xtn8b(<8 x i16> %A) nounwind {
 ;CHECK-LABEL: xtn8b:
diff --git a/test/CodeGen/AArch64/arm64-vmul.ll b/test/CodeGen/AArch64/arm64-vmul.ll
index 3df847ec3748..a5fa78abb92f 100644
--- a/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/test/CodeGen/AArch64/arm64-vmul.ll
@@ -1,4 +1,4 @@
-; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 
 define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
diff --git a/test/CodeGen/AArch64/arm64-volatile.ll b/test/CodeGen/AArch64/arm64-volatile.ll
index 28facb6da7c6..66ecd6a3583d 100644
--- a/test/CodeGen/AArch64/arm64-volatile.ll
+++ b/test/CodeGen/AArch64/arm64-volatile.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 define i64 @normal_load(i64* nocapture %bar) nounwind readonly {
 ; CHECK: normal_load
 ; CHECK: ldp
diff --git a/test/CodeGen/AArch64/arm64-vpopcnt.ll b/test/CodeGen/AArch64/arm64-vpopcnt.ll
index 25306eba4917..4fb73ca4805d 100644
--- a/test/CodeGen/AArch64/arm64-vpopcnt.ll
+++ b/test/CodeGen/AArch64/arm64-vpopcnt.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
-target triple = "arm64-apple-ios"
+; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s
 
 ; The non-byte ones used to fail with "Cannot select"
 
diff --git a/test/CodeGen/AArch64/arm64-vqadd.ll b/test/CodeGen/AArch64/arm64-vqadd.ll
index 9932899c6424..b7d61056ad9b 100644
--- a/test/CodeGen/AArch64/arm64-vqadd.ll
+++ b/test/CodeGen/AArch64/arm64-vqadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqadd8b:
diff --git a/test/CodeGen/AArch64/arm64-vqsub.ll b/test/CodeGen/AArch64/arm64-vqsub.ll
index 4fc588d689f9..77aac59d1419 100644
--- a/test/CodeGen/AArch64/arm64-vqsub.ll
+++ b/test/CodeGen/AArch64/arm64-vqsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqsub8b:
diff --git a/test/CodeGen/AArch64/arm64-vselect.ll b/test/CodeGen/AArch64/arm64-vselect.ll
index 9988512f530e..e48f2b29b913 100644
--- a/test/CodeGen/AArch64/arm64-vselect.ll
+++ b/test/CodeGen/AArch64/arm64-vselect.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 ;CHECK: @func63
 ;CHECK: cmeq.4h v0, v0, v1
diff --git a/test/CodeGen/AArch64/arm64-vsetcc_fp.ll b/test/CodeGen/AArch64/arm64-vsetcc_fp.ll
index f4f4714dde4d..32e24832d8aa 100644
--- a/test/CodeGen/AArch64/arm64-vsetcc_fp.ll
+++ b/test/CodeGen/AArch64/arm64-vsetcc_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 define <2 x i32> @fcmp_one(<2 x float> %x, <2 x float> %y) nounwind optsize readnone {
 ; CHECK-LABEL: fcmp_one:
 ; CHECK-NEXT: fcmgt.2s [[REG:v[0-9]+]], v0, v1
diff --git a/test/CodeGen/AArch64/arm64-vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll
index b5a6788979e2..c1c4649bd6a4 100644
--- a/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/test/CodeGen/AArch64/arm64-vshift.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s
 
 define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqshl8b:
diff --git a/test/CodeGen/AArch64/arm64-vshr.ll b/test/CodeGen/AArch64/arm64-vshr.ll
index 8d263f22c54e..6d599ccd6fc5 100644
--- a/test/CodeGen/AArch64/arm64-vshr.ll
+++ b/test/CodeGen/AArch64/arm64-vshr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
 
 define <8 x i16> @testShiftRightArith_v8i16(<8 x i16> %a, <8 x i16> %b) #0 {
 ; CHECK-LABEL: testShiftRightArith_v8i16:
diff --git a/test/CodeGen/AArch64/arm64-vsqrt.ll b/test/CodeGen/AArch64/arm64-vsqrt.ll
index 20aebd9cae36..5052f60f2cee 100644
--- a/test/CodeGen/AArch64/arm64-vsqrt.ll
+++ b/test/CodeGen/AArch64/arm64-vsqrt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: frecps_2s:
diff --git a/test/CodeGen/AArch64/arm64-vsra.ll b/test/CodeGen/AArch64/arm64-vsra.ll
index d480dfe1f7d8..15364f4001cb 100644
--- a/test/CodeGen/AArch64/arm64-vsra.ll
+++ b/test/CodeGen/AArch64/arm64-vsra.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsras8:
diff --git a/test/CodeGen/AArch64/arm64-vsub.ll b/test/CodeGen/AArch64/arm64-vsub.ll
index 6b44b56b7bf0..7af69118347e 100644
--- a/test/CodeGen/AArch64/arm64-vsub.ll
+++ b/test/CodeGen/AArch64/arm64-vsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: subhn8b:
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll
index ec49110d4052..8b212aa6c1da 100644
--- a/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -disable-post-ra -verify-machineinstrs | FileCheck %s
 
 ;
 ; Get the actual value of the overflow bit.
diff --git a/test/CodeGen/AArch64/arm64-zeroreg.ll b/test/CodeGen/AArch64/arm64-zeroreg.ll
new file mode 100644
index 000000000000..f6e1bc3eaf44
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-zeroreg.ll
@@ -0,0 +1,91 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "aarch64--"
+
+declare void @begin()
+declare void @end()
+
+; Test that we use the zero register before regalloc and do not unnecessarily
+; clobber a register with the SUBS (cmp) instruction.
+; CHECK-LABEL: func:
+define void @func(i64* %addr) {
+  ; We should not see any spills or reloads between begin and end
+  ; CHECK: bl begin
+  ; CHECK-NOT: str{{.*}}sp
+  ; CHECK-NOT: Folded Spill
+  ; CHECK-NOT: ldr{{.*}}sp
+  ; CHECK-NOT: Folded Reload
+  call void @begin()
+  %v0 = load volatile i64, i64* %addr  
+  %v1 = load volatile i64, i64* %addr  
+  %v2 = load volatile i64, i64* %addr  
+  %v3 = load volatile i64, i64* %addr  
+  %v4 = load volatile i64, i64* %addr  
+  %v5 = load volatile i64, i64* %addr  
+  %v6 = load volatile i64, i64* %addr  
+  %v7 = load volatile i64, i64* %addr  
+  %v8 = load volatile i64, i64* %addr  
+  %v9 = load volatile i64, i64* %addr  
+  %v10 = load volatile i64, i64* %addr  
+  %v11 = load volatile i64, i64* %addr  
+  %v12 = load volatile i64, i64* %addr  
+  %v13 = load volatile i64, i64* %addr  
+  %v14 = load volatile i64, i64* %addr  
+  %v15 = load volatile i64, i64* %addr  
+  %v16 = load volatile i64, i64* %addr  
+  %v17 = load volatile i64, i64* %addr  
+  %v18 = load volatile i64, i64* %addr  
+  %v19 = load volatile i64, i64* %addr  
+  %v20 = load volatile i64, i64* %addr
+  %v21 = load volatile i64, i64* %addr
+  %v22 = load volatile i64, i64* %addr
+  %v23 = load volatile i64, i64* %addr
+  %v24 = load volatile i64, i64* %addr
+  %v25 = load volatile i64, i64* %addr
+  %v26 = load volatile i64, i64* %addr
+  %v27 = load volatile i64, i64* %addr
+  %v28 = load volatile i64, i64* %addr
+  %v29 = load volatile i64, i64* %addr
+
+  %c = icmp eq i64 %v0, %v1
+  br i1 %c, label %if.then, label %if.end
+
+if.then:
+  store volatile i64 %v2, i64* %addr
+  br label %if.end
+
+if.end:
+  store volatile i64 %v0, i64* %addr
+  store volatile i64 %v1, i64* %addr
+  store volatile i64 %v2, i64* %addr
+  store volatile i64 %v3, i64* %addr
+  store volatile i64 %v4, i64* %addr
+  store volatile i64 %v5, i64* %addr
+  store volatile i64 %v6, i64* %addr
+  store volatile i64 %v7, i64* %addr
+  store volatile i64 %v8, i64* %addr
+  store volatile i64 %v9, i64* %addr
+  store volatile i64 %v10, i64* %addr
+  store volatile i64 %v11, i64* %addr
+  store volatile i64 %v12, i64* %addr
+  store volatile i64 %v13, i64* %addr
+  store volatile i64 %v14, i64* %addr
+  store volatile i64 %v15, i64* %addr
+  store volatile i64 %v16, i64* %addr
+  store volatile i64 %v17, i64* %addr
+  store volatile i64 %v18, i64* %addr
+  store volatile i64 %v19, i64* %addr
+  store volatile i64 %v20, i64* %addr
+  store volatile i64 %v21, i64* %addr
+  store volatile i64 %v22, i64* %addr
+  store volatile i64 %v23, i64* %addr
+  store volatile i64 %v24, i64* %addr
+  store volatile i64 %v25, i64* %addr
+  store volatile i64 %v26, i64* %addr
+  store volatile i64 %v27, i64* %addr
+  store volatile i64 %v28, i64* %addr
+  store volatile i64 %v29, i64* %addr
+  ; CHECK: bl end
+  call void @end()
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-zext.ll b/test/CodeGen/AArch64/arm64-zext.ll
index 8d9e5ea040ee..9470708ebdc0 100644
--- a/test/CodeGen/AArch64/arm64-zext.ll
+++ b/test/CodeGen/AArch64/arm64-zext.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 define i64 @foo(i32 %a, i32 %b) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-zextload-unscaled.ll b/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
index 321cf10fe45c..7a94bbf24d41 100644
--- a/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
+++ b/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 < %s | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
 
 @var32 = global i32 0
 
diff --git a/test/CodeGen/AArch64/arm64-zip.ll b/test/CodeGen/AArch64/arm64-zip.ll
index ddce002c25db..b32123df9219 100644
--- a/test/CodeGen/AArch64/arm64-zip.ll
+++ b/test/CodeGen/AArch64/arm64-zip.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vzipi8:
diff --git a/test/CodeGen/AArch64/asm-large-immediate.ll b/test/CodeGen/AArch64/asm-large-immediate.ll
index 05e4dddc7a7f..83690716a9e2 100644
--- a/test/CodeGen/AArch64/asm-large-immediate.ll
+++ b/test/CodeGen/AArch64/asm-large-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=aarch64 -no-integrated-as < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -no-integrated-as | FileCheck %s
 
 define void @test() {
 entry:
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index 9fac8d8a868a..b763e065200d 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -452,20 +452,19 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i32:
+; CHECK: mov {{[xw]}}8, w[[OLD:[0-9]+]]
    %old = atomicrmw xchg i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxr {{[xw]}}[[OLD]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w8, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
-
-; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
diff --git a/test/CodeGen/AArch64/bics.ll b/test/CodeGen/AArch64/bics.ll
new file mode 100644
index 000000000000..53aa28ad913f
--- /dev/null
+++ b/test/CodeGen/AArch64/bics.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+define i1 @andn_cmp(i32 %x, i32 %y) {
+; CHECK-LABEL: andn_cmp:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    bics wzr, w1, w0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+;
+  %notx = xor i32 %x, -1
+  %and = and i32 %notx, %y
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @and_cmp(i32 %x, i32 %y) {
+; CHECK-LABEL: and_cmp:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    bics wzr, w1, w0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+;
+  %and = and i32 %x, %y
+  %cmp = icmp eq i32 %and, %y
+  ret i1 %cmp
+}
+
+define i1 @and_cmp_const(i32 %x) {
+; CHECK-LABEL: and_cmp_const:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    mov w8, #43
+; CHECK-NEXT:    bics wzr, w8, w0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+;
+  %and = and i32 %x, 43
+  %cmp = icmp eq i32 %and, 43
+  ret i1 %cmp
+}
+
diff --git a/test/CodeGen/AArch64/bitreverse.ll b/test/CodeGen/AArch64/bitreverse.ll
index 2eee7cfd8b97..135bce3bdb6c 100644
--- a/test/CodeGen/AArch64/bitreverse.ll
+++ b/test/CodeGen/AArch64/bitreverse.ll
@@ -15,29 +15,28 @@ define <2 x i16> @f(<2 x i16> %a) {
 
 declare i8 @llvm.bitreverse.i8(i8) readnone
 
-; Unfortunately some of the shift-and-inserts become BFIs, and some do not :(
 define i8 @g(i8 %a) {
 ; CHECK-LABEL: g:
-; CHECK-DAG: lsr [[S5:w.*]], w0, #5
-; CHECK-DAG: lsr [[S4:w.*]], w0, #4
-; CHECK-DAG: lsr [[S3:w.*]], w0, #3
-; CHECK-DAG: lsr [[S2:w.*]], w0, #2
-; CHECK-DAG: lsl [[L1:w.*]], w0, #29
-; CHECK-DAG: lsl [[L2:w.*]], w0, #19
-; CHECK-DAG: lsl [[L3:w.*]], w0, #17
+; CHECK-DAG: rev [[RV:w.*]], w0
+; CHECK-DAG: and [[L4:w.*]], [[RV]], #0xf0f0f0f
+; CHECK-DAG: and [[H4:w.*]], [[RV]], #0xf0f0f0f0
+; CHECK-DAG: lsr [[S4:w.*]], [[H4]], #4
+; CHECK-DAG: orr [[R4:w.*]], [[S4]], [[L4]], lsl #4
 
-; CHECK-DAG: and [[T1:w.*]], [[L1]], #0x40000000
-; CHECK-DAG: bfi [[T1]], w0, #31, #1
-; CHECK-DAG: bfi [[T1]], [[S2]], #29, #1
-; CHECK-DAG: bfi [[T1]], [[S3]], #28, #1
-; CHECK-DAG: bfi [[T1]], [[S4]], #27, #1
-; CHECK-DAG: bfi [[T1]], [[S5]], #26, #1
-; CHECK-DAG: and [[T2:w.*]], [[L2]], #0x2000000
-; CHECK-DAG: and [[T3:w.*]], [[L3]], #0x1000000
-; CHECK-DAG: orr [[T4:w.*]], [[T1]], [[T2]]
-; CHECK-DAG: orr [[T5:w.*]], [[T4]], [[T3]]
-; CHECK:     lsr w0, [[T5]], #24
+; CHECK-DAG: and [[L2:w.*]], [[R4]], #0x33333333
+; CHECK-DAG: and [[H2:w.*]], [[R4]], #0xcccccccc
+; CHECK-DAG: lsr [[S2:w.*]], [[H2]], #2
+; CHECK-DAG: orr [[R2:w.*]], [[S2]], [[L2]], lsl #2
 
+; CHECK-DAG: mov [[P1:w.*]], #1426063360
+; CHECK-DAG: mov [[N1:w.*]], #-1442840576
+; CHECK-DAG: and [[L1:w.*]], [[R2]], [[P1]]
+; CHECK-DAG: and [[H1:w.*]], [[R2]], [[N1]]
+; CHECK-DAG: lsr [[S1:w.*]], [[H1]], #1
+; CHECK-DAG: orr [[R1:w.*]], [[S1]], [[L1]], lsl #1
+
+; CHECK-DAG: lsr w0, [[R1]], #24
+; CHECK-DAG: ret
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
 }
@@ -45,44 +44,31 @@ define i8 @g(i8 %a) {
 declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) readnone
 
 define <8 x i8> @g_vec(<8 x i8> %a) {
-; Try and match as much of the sequence as precisely as possible.
+; CHECK-DAG: movi [[M1:v.*]], #15
+; CHECK-DAG: movi [[M2:v.*]], #240
+; CHECK:     and  [[A1:v.*]], v0.8b, [[M1]]
+; CHECK:     and  [[A2:v.*]], v0.8b, [[M2]]
+; CHECK-DAG: shl  [[L4:v.*]], [[A1]], #4
+; CHECK-DAG: ushr [[R4:v.*]], [[A2]], #4
+; CHECK-DAG: orr  [[V4:v.*]], [[R4]], [[L4]]
+
+; CHECK-DAG: movi [[M3:v.*]], #51
+; CHECK-DAG: movi [[M4:v.*]], #204
+; CHECK:     and  [[A3:v.*]], [[V4]], [[M3]]
+; CHECK:     and  [[A4:v.*]], [[V4]], [[M4]]
+; CHECK-DAG: shl  [[L2:v.*]], [[A3]], #2
+; CHECK-DAG: ushr [[R2:v.*]], [[A4]], #2
+; CHECK-DAG: orr  [[V2:v.*]], [[R2]], [[L2]]
 
-; CHECK-LABEL: g_vec:
-; CHECK-DAG: movi [[M1:v.*]], #128
-; CHECK-DAG: movi [[M2:v.*]], #64
-; CHECK-DAG: movi [[M3:v.*]], #32
-; CHECK-DAG: movi [[M4:v.*]], #16
-; CHECK-DAG: movi [[M5:v.*]], #8{{$}}
-; CHECK-DAG: movi [[M6:v.*]], #4{{$}}
-; CHECK-DAG: movi [[M7:v.*]], #2{{$}}
-; CHECK-DAG: movi [[M8:v.*]], #1{{$}}
-; CHECK-DAG: shl  [[S1:v.*]], v0.8b, #7
-; CHECK-DAG: shl  [[S2:v.*]], v0.8b, #5
-; CHECK-DAG: shl  [[S3:v.*]], v0.8b, #3
-; CHECK-DAG: shl  [[S4:v.*]], v0.8b, #1
-; CHECK-DAG: ushr [[S5:v.*]], v0.8b, #1
-; CHECK-DAG: ushr [[S6:v.*]], v0.8b, #3
-; CHECK-DAG: ushr [[S7:v.*]], v0.8b, #5
-; CHECK-DAG: ushr [[S8:v.*]], v0.8b, #7
-; CHECK-DAG: and  [[A1:v.*]], [[S1]], [[M1]]
-; CHECK-DAG: and  [[A2:v.*]], [[S2]], [[M2]]
-; CHECK-DAG: and  [[A3:v.*]], [[S3]], [[M3]]
-; CHECK-DAG: and  [[A4:v.*]], [[S4]], [[M4]]
-; CHECK-DAG: and  [[A5:v.*]], [[S5]], [[M5]]
-; CHECK-DAG: and  [[A6:v.*]], [[S6]], [[M6]]
-; CHECK-DAG: and  [[A7:v.*]], [[S7]], [[M7]]
-; CHECK-DAG: and  [[A8:v.*]], [[S8]], [[M8]]
+; CHECK-DAG: movi [[M5:v.*]], #85
+; CHECK-DAG: movi [[M6:v.*]], #170
+; CHECK:     and  [[A5:v.*]], [[V2]], [[M5]]
+; CHECK:     and  [[A6:v.*]], [[V2]], [[M6]]
+; CHECK-DAG: shl  [[L1:v.*]], [[A5]], #1
+; CHECK-DAG: ushr [[R1:v.*]], [[A6]], #1
+; CHECK:     orr  [[V1:v.*]], [[R1]], [[L1]]
 
-; The rest can be ORRed together in any order; it's not worth the test
-; maintenance to match them precisely.
-; CHECK-DAG: orr
-; CHECK-DAG: orr
-; CHECK-DAG: orr
-; CHECK-DAG: orr
-; CHECK-DAG: orr
-; CHECK-DAG: orr
-; CHECK-DAG: orr
-; CHECK: ret
+; CHECK:     ret
   %b = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
   ret <8 x i8> %b
 }
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
index e93c69fd3ea3..7c0755a13d0e 100644
--- a/test/CodeGen/AArch64/blockaddress.ll
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 @addr = global i8* null
 
diff --git a/test/CodeGen/AArch64/branch-folder-merge-mmos.ll b/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
index e3af90ae4831..3ecb1d49ee1c 100644
--- a/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
+++ b/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
@@ -1,9 +1,9 @@
-; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu -stop-after branch-folder -o - < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -stop-after branch-folder | FileCheck %s
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ; Function Attrs: norecurse nounwind
 define void @foo(i32 %a, i32 %b, float* nocapture %foo_arr) #0 {
-; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}), (load 4 from %ir.arrayidx1.{{i[1-2]}})
+; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}})
 entry:
   %cmp = icmp sgt i32 %a, 0
   br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/AArch64/branch-relax-alignment.ll b/test/CodeGen/AArch64/branch-relax-alignment.ll
new file mode 100644
index 000000000000..7135dff7f573
--- /dev/null
+++ b/test/CodeGen/AArch64/branch-relax-alignment.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-bcc-offset-bits=4 -align-all-nofallthru-blocks=4 < %s | FileCheck %s
+
+; Long branch is assumed because the block has a higher alignment
+; requirement than the function.
+
+; CHECK-LABEL: invert_bcc_block_align_higher_func:
+; CHECK: b.eq [[JUMP_BB1:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b [[JUMP_BB2:LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[JUMP_BB1]]:
+; CHECK: ret
+; CHECK: .p2align 4
+
+; CHECK: [[JUMP_BB2]]:
+; CHECK: ret
+define i32 @invert_bcc_block_align_higher_func(i32 %x, i32 %y) align 4 #0 {
+  %1 = icmp eq i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+
+bb2:
+  store volatile i32 9, i32* undef
+  ret i32 1
+
+bb1:
+  store volatile i32 42, i32* undef
+  ret i32 0
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/branch-relax-bcc.ll b/test/CodeGen/AArch64/branch-relax-bcc.ll
new file mode 100644
index 000000000000..636acf0a8b82
--- /dev/null
+++ b/test/CodeGen/AArch64/branch-relax-bcc.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-bcc-offset-bits=3 < %s | FileCheck %s
+
+; CHECK-LABEL: invert_bcc:
+; CHECK:      fcmp s0, s1
+; CHECK-NEXT: b.eq [[JUMP_BB1:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b [[JUMP_BB2:LBB[0-9]+_[0-9]+]]
+
+; CHECK-NEXT: [[JUMP_BB1]]:
+; CHECK-NEXT: b [[BB1:LBB[0-9]+_[0-9]+]]
+
+; CHECK-NEXT: [[JUMP_BB2]]:
+; CHECK-NEXT: b.vc [[BB2:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b [[BB1]]
+
+; CHECK: [[BB2]]: ; %bb2
+; CHECK: mov w{{[0-9]+}}, #9
+; CHECK: ret
+
+; CHECK: [[BB1]]: ; %bb1
+; CHECK: mov w{{[0-9]+}}, #42
+; CHECK: ret
+
+define i32 @invert_bcc(float %x, float %y) #0 {
+  %1 = fcmp ueq float %x, %y
+  br i1 %1, label %bb1, label %bb2
+
+bb2:
+  call void asm sideeffect
+    "nop
+     nop",
+    ""() #0
+  store volatile i32 9, i32* undef
+  ret i32 1
+
+bb1:
+  store volatile i32 42, i32* undef
+  ret i32 0
+}
+
+declare i32 @foo() #0
+
+; CHECK-LABEL: _block_split:
+; CHECK: cmp w0, #5
+; CHECK-NEXT: b.eq [[LONG_BR_BB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b [[LOR_LHS_FALSE_BB:LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[LONG_BR_BB]]:
+; CHECK-NEXT: b [[IF_THEN_BB:LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[LOR_LHS_FALSE_BB]]:
+; CHECK: cmp w{{[0-9]+}}, #16
+; CHECK-NEXT: b.le [[IF_THEN_BB]]
+; CHECK-NEXT: b [[IF_END_BB:LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[IF_THEN_BB]]:
+; CHECK: bl _foo
+; CHECK-NOT: b L
+
+; CHECK: [[IF_END_BB]]:
+; CHECK: #0x7
+; CHECK: ret
+define i32 @block_split(i32 %a, i32 %b) #0 {
+entry:
+  %cmp = icmp eq i32 %a, 5
+  br i1 %cmp, label %if.then, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp1 = icmp slt i32 %b, 7
+  %mul = shl nsw i32 %b, 1
+  %add = add nsw i32 %b, 1
+  %cond = select i1 %cmp1, i32 %mul, i32 %add
+  %cmp2 = icmp slt i32 %cond, 17
+  br i1 %cmp2, label %if.then, label %if.end
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %call = tail call i32 @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false
+  ret i32 7
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/branch-relax-cbz.ll b/test/CodeGen/AArch64/branch-relax-cbz.ll
new file mode 100644
index 000000000000..c654b94e49cf
--- /dev/null
+++ b/test/CodeGen/AArch64/branch-relax-cbz.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-cbz-offset-bits=3 < %s | FileCheck %s
+
+; CHECK-LABEL: _split_block_no_fallthrough:
+; CHECK: cmn x{{[0-9]+}}, #5
+; CHECK-NEXT: b.le [[B2:LBB[0-9]+_[0-9]+]]
+
+; CHECK-NEXT: ; BB#1: ; %b3
+; CHECK: ldr [[LOAD:w[0-9]+]]
+; CHECK: cbz [[LOAD]], [[SKIP_LONG_B:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b [[B8:LBB[0-9]+_[0-9]+]]
+
+; CHECK-NEXT: [[SKIP_LONG_B]]:
+; CHECK-NEXT: b [[B7:LBB[0-9]+_[0-9]+]]
+
+; CHECK-NEXT: [[B2]]: ; %b2
+; CHECK: mov w{{[0-9]+}}, #93
+; CHECK: bl _extfunc
+; CHECK: cbz w{{[0-9]+}}, [[B7]]
+
+; CHECK-NEXT: [[B8]]: ; %b8
+; CHECK-NEXT: ret
+
+; CHECK-NEXT: [[B7]]: ; %b7
+; CHECK: mov w{{[0-9]+}}, #13
+; CHECK: b _extfunc
+define void @split_block_no_fallthrough(i64 %val) #0 {
+bb:
+  %c0 = icmp sgt i64 %val, -5
+  br i1 %c0, label %b3, label %b2
+
+b2:
+  %v0 = tail call i32 @extfunc(i32 93)
+  %c1 = icmp eq i32 %v0, 0
+  br i1 %c1, label %b7, label %b8
+
+b3:
+  %v1 = load volatile i32, i32* undef, align 4
+  %c2 = icmp eq i32 %v1, 0
+  br i1 %c2, label %b7, label %b8
+
+b7:
+  %tmp1 = tail call i32 @extfunc(i32 13)
+  ret void
+
+b8:
+  ret void
+}
+
+declare i32 @extfunc(i32) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
index 42061a851db2..311abcacd74a 100644
--- a/test/CodeGen/AArch64/breg.ll
+++ b/test/CodeGen/AArch64/breg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
 
 @stored_label = global i8* null
 
diff --git a/test/CodeGen/AArch64/cmp-const-max.ll b/test/CodeGen/AArch64/cmp-const-max.ll
index 0431e391a30b..0d5846f06793 100644
--- a/test/CodeGen/AArch64/cmp-const-max.ll
+++ b/test/CodeGen/AArch64/cmp-const-max.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -aarch64-atomic-cfg-tidy=0 < %s -mtriple=aarch64-none-eabihf -fast-isel=false | FileCheck %s
+; RUN: llc -verify-machineinstrs -aarch64-enable-atomic-cfg-tidy=0 < %s -mtriple=aarch64-none-eabihf -fast-isel=false | FileCheck %s
 
 
 define i32 @ule_64_max(i64 %p) {
diff --git a/test/CodeGen/AArch64/cmpwithshort.ll b/test/CodeGen/AArch64/cmpwithshort.ll
index 65909974af73..8a94689adc94 100644
--- a/test/CodeGen/AArch64/cmpwithshort.ll
+++ b/test/CodeGen/AArch64/cmpwithshort.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -march=aarch64 < %s | FileCheck %s 
+; RUN: llc < %s -O3 -mtriple=aarch64-eabi | FileCheck %s 
 
 define i16 @test_1cmp_signed_1(i16* %ptr1) {
 ; CHECK-LABLE: @test_1cmp_signed_1
diff --git a/test/CodeGen/AArch64/cmpxchg-O0.ll b/test/CodeGen/AArch64/cmpxchg-O0.ll
index aed1aa493a8f..8432b15ea523 100644
--- a/test/CodeGen/AArch64/cmpxchg-O0.ll
+++ b/test/CodeGen/AArch64/cmpxchg-O0.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 %s -o - | FileCheck %s
 
 define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_8:
diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index 1f8e0efa0675..86be3ccea1d8 100644
--- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 ; marked as external to prevent possible optimizations
 @a = external global i32
diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll
index 4e0f69d195c2..506314451224 100644
--- a/test/CodeGen/AArch64/compare-branch.ll
+++ b/test/CodeGen/AArch64/compare-branch.ll
@@ -8,25 +8,25 @@ define void @foo() {
 
   %val1 = load volatile i32, i32* @var32
   %tst1 = icmp eq i32 %val1, 0
-  br i1 %tst1, label %end, label %test2
+  br i1 %tst1, label %end, label %test2, !prof !1
 ; CHECK: cbz {{w[0-9]+}}, .LBB
 
 test2:
   %val2 = load volatile i32, i32* @var32
   %tst2 = icmp ne i32 %val2, 0
-  br i1 %tst2, label %end, label %test3
+  br i1 %tst2, label %end, label %test3, !prof !1
 ; CHECK: cbnz {{w[0-9]+}}, .LBB
 
 test3:
   %val3 = load volatile i64, i64* @var64
   %tst3 = icmp eq i64 %val3, 0
-  br i1 %tst3, label %end, label %test4
+  br i1 %tst3, label %end, label %test4, !prof !1
 ; CHECK: cbz {{x[0-9]+}}, .LBB
 
 test4:
   %val4 = load volatile i64, i64* @var64
   %tst4 = icmp ne i64 %val4, 0
-  br i1 %tst4, label %end, label %test5
+  br i1 %tst4, label %end, label %test5, !prof !1
 ; CHECK: cbnz {{x[0-9]+}}, .LBB
 
 test5:
@@ -36,3 +36,6 @@ test5:
 end:
   ret void
 }
+
+
+!1 = !{!"branch_weights", i32 1, i32 1}
diff --git a/test/CodeGen/AArch64/complex-fp-to-int.ll b/test/CodeGen/AArch64/complex-fp-to-int.ll
index 13cf762c3d2e..6024e70789a3 100644
--- a/test/CodeGen/AArch64/complex-fp-to-int.ll
+++ b/test/CodeGen/AArch64/complex-fp-to-int.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x i64> @test_v2f32_to_signed_v2i64(<2 x float> %in) {
 ; CHECK-LABEL: test_v2f32_to_signed_v2i64:
diff --git a/test/CodeGen/AArch64/complex-int-to-fp.ll b/test/CodeGen/AArch64/complex-int-to-fp.ll
index 227c626ba15d..e37e508ca2bf 100644
--- a/test/CodeGen/AArch64/complex-int-to-fp.ll
+++ b/test/CodeGen/AArch64/complex-int-to-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 ; CHECK: autogen_SD19655
 ; CHECK: scvtf
diff --git a/test/CodeGen/AArch64/cond-sel-value-prop.ll b/test/CodeGen/AArch64/cond-sel-value-prop.ll
new file mode 100644
index 000000000000..dd87afce4b00
--- /dev/null
+++ b/test/CodeGen/AArch64/cond-sel-value-prop.ll
@@ -0,0 +1,110 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; Transform "a == C ? C : x" to "a == C ? a : x" to avoid materializing C.
+; CHECK-LABEL: test1:
+; CHECK: cmp w[[REG1:[0-9]+]], #2
+; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x7
+; CHECK: csel w0, w[[REG1]], w[[REG2]], eq
+define i32 @test1(i32 %x) {
+  %cmp = icmp eq i32 %x, 2
+  %res = select i1 %cmp, i32 2, i32 7
+  ret i32 %res
+}
+
+; Transform "a == C ? C : x" to "a == C ? a : x" to avoid materializing C.
+; CHECK-LABEL: test2:
+; CHECK: cmp x[[REG1:[0-9]+]], #2
+; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x7
+; CHECK: csel x0, x[[REG1]], x[[REG2]], eq
+define i64 @test2(i64 %x) {
+  %cmp = icmp eq i64 %x, 2
+  %res = select i1 %cmp, i64 2, i64 7
+  ret i64 %res
+}
+
+; Transform "a != C ? x : C" to "a != C ? x : a" to avoid materializing C.
+; CHECK-LABEL: test3:
+; CHECK: cmp x[[REG1:[0-9]+]], #7
+; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x2
+; CHECK: csel x0, x[[REG2]], x[[REG1]], ne
+define i64 @test3(i64 %x) {
+  %cmp = icmp ne i64 %x, 7
+  %res = select i1 %cmp, i64 2, i64 7
+  ret i64 %res
+}
+
+; Don't transform "a == C ? C : x" to "a == C ? a : x" if a == 0.  If we did we
+; would needlessly extend the live range of x0 when we can just use xzr.
+; CHECK-LABEL: test4:
+; CHECK: cmp x0, #0
+; CHECK: orr w8, wzr, #0x7
+; CHECK: csel x0, xzr, x8, eq
+define i64 @test4(i64 %x) {
+  %cmp = icmp eq i64 %x, 0
+  %res = select i1 %cmp, i64 0, i64 7
+  ret i64 %res
+}
+
+; Don't transform "a == C ? C : x" to "a == C ? a : x" if a == 1.  If we did we
+; would needlessly extend the live range of x0 when we can just use xzr with
+; CSINC to materialize the 1.
+; CHECK-LABEL: test5:
+; CHECK: cmp x0, #1
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x7
+; CHECK: csinc x0, x[[REG]], xzr, ne
+define i64 @test5(i64 %x) {
+  %cmp = icmp eq i64 %x, 1
+  %res = select i1 %cmp, i64 1, i64 7
+  ret i64 %res
+}
+
+; Don't transform "a == C ? C : x" to "a == C ? a : x" if a == -1.  If we did we
+; would needlessly extend the live range of x0 when we can just use xzr with
+; CSINV to materialize the -1.
+; CHECK-LABEL: test6:
+; CHECK: cmn x0, #1
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x7
+; CHECK: csinv x0, x[[REG]], xzr, ne
+define i64 @test6(i64 %x) {
+  %cmp = icmp eq i64 %x, -1
+  %res = select i1 %cmp, i64 -1, i64 7
+  ret i64 %res
+}
+
+; CHECK-LABEL: test7:
+; CHECK: cmp x[[REG:[0-9]]], #7
+; CHECK: csinc x0, x[[REG]], xzr, eq
+define i64 @test7(i64 %x) {
+  %cmp = icmp eq i64 %x, 7
+  %res = select i1 %cmp, i64 7, i64 1
+  ret i64 %res
+}
+
+; CHECK-LABEL: test8:
+; CHECK: cmp x[[REG:[0-9]]], #7
+; CHECK: csinc x0, x[[REG]], xzr, eq
+define i64 @test8(i64 %x) {
+  %cmp = icmp ne i64 %x, 7
+  %res = select i1 %cmp, i64 1, i64 7
+  ret i64 %res
+}
+
+; CHECK-LABEL: test9:
+; CHECK: cmp x[[REG:[0-9]]], #7
+; CHECK: csinv x0, x[[REG]], xzr, eq
+define i64 @test9(i64 %x) {
+  %cmp = icmp eq i64 %x, 7
+  %res = select i1 %cmp, i64 7, i64 -1
+  ret i64 %res
+}
+
+; Rather than use a CNEG, use a CSINV to transform "a == 1 ? 1 : -1" to
+; "a == 1 ? a : -1" to avoid materializing a constant.
+; CHECK-LABEL: test10:
+; CHECK: cmp w[[REG:[0-9]]], #1
+; CHECK: csinv w0, w[[REG]], wzr, eq
+define i32 @test10(i32 %x) {
+  %cmp = icmp eq i32 %x, 1
+  %res = select i1 %cmp, i32 1, i32 -1
+  ret i32 %res
+}
diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll
index 3296e38b64f4..50685cf5d343 100644
--- a/test/CodeGen/AArch64/cpus.ll
+++ b/test/CodeGen/AArch64/cpus.ll
@@ -8,6 +8,9 @@
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a72 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a73 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m1 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m2 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m3 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=falkor 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=kryo 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=vulcan 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
diff --git a/test/CodeGen/AArch64/csel-zero-float.ll b/test/CodeGen/AArch64/csel-zero-float.ll
new file mode 100644
index 000000000000..9869c651f56f
--- /dev/null
+++ b/test/CodeGen/AArch64/csel-zero-float.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -enable-unsafe-fp-math < %s
+; There is no invocation to FileCheck as this
+; caused a crash in "Post-RA pseudo instruction expansion"
+
+define double @foo(float *%user, float %t17) {
+  %t16 = load float, float* %user, align 8
+  %conv = fpext float %t16 to double
+  %cmp26 = fcmp fast oeq float %t17, 0.000000e+00
+  %div = fdiv fast float %t16, %t17
+  %div.op = fmul fast float %div, 1.000000e+02
+  %t18 = fpext float %div.op to double
+  %conv31 = select i1 %cmp26, double 0.000000e+00, double %t18
+  ret double %conv31
+}
+
diff --git a/test/CodeGen/AArch64/dag-combine-mul-shl.ll b/test/CodeGen/AArch64/dag-combine-mul-shl.ll
new file mode 100644
index 000000000000..00c500594063
--- /dev/null
+++ b/test/CodeGen/AArch64/dag-combine-mul-shl.ll
@@ -0,0 +1,117 @@
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+; CHECK-LABEL: fn1_vector:
+; CHECK:      adrp x[[BASE:[0-9]+]], .LCP
+; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]],
+; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b
+; CHECK-NEXT: ret
+define <16 x i8> @fn1_vector(<16 x i8> %arg) {
+entry:
+  %shl = shl <16 x i8> %arg, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  %mul = mul <16 x i8> %shl, <i8 0, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  ret <16 x i8> %mul
+}
+
+; CHECK-LABEL: fn2_vector:
+; CHECK:      adrp x[[BASE:[0-9]+]], .LCP
+; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]],
+; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b
+; CHECK-NEXT: ret
+define <16 x i8> @fn2_vector(<16 x i8> %arg) {
+entry:
+  %mul = mul <16 x i8> %arg, <i8 0, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  %shl = shl <16 x i8> %mul, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %shl
+}
+
+; CHECK-LABEL: fn1_vector_undef:
+; CHECK:      adrp x[[BASE:[0-9]+]], .LCP
+; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]],
+; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b
+; CHECK-NEXT: ret
+define <16 x i8> @fn1_vector_undef(<16 x i8> %arg) {
+entry:
+  %shl = shl <16 x i8> %arg, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  %mul = mul <16 x i8> %shl, <i8 undef, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  ret <16 x i8> %mul
+}
+
+; CHECK-LABEL: fn2_vector_undef:
+; CHECK:      adrp x[[BASE:[0-9]+]], .LCP
+; CHECK-NEXT: ldr q[[NUM:[0-9]+]], [x[[BASE]],
+; CHECK-NEXT: mul v0.16b, v0.16b, v[[NUM]].16b
+; CHECK-NEXT: ret
+define <16 x i8> @fn2_vector_undef(<16 x i8> %arg) {
+entry:
+  %mul = mul <16 x i8> %arg, <i8 undef, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  %shl = shl <16 x i8> %mul, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %shl
+}
+
+; CHECK-LABEL: fn1_scalar:
+; CHECK:      mov w[[REG:[0-9]+]], #1664
+; CHECK-NEXT: mul w0, w0, w[[REG]]
+; CHECK-NEXT: ret
+define i32 @fn1_scalar(i32 %arg) {
+entry:
+  %shl = shl i32 %arg, 7
+  %mul = mul i32 %shl, 13
+  ret i32 %mul
+}
+
+; CHECK-LABEL: fn2_scalar:
+; CHECK:      mov w[[REG:[0-9]+]], #1664
+; CHECK-NEXT: mul w0, w0, w[[REG]]
+; CHECK-NEXT: ret
+define i32 @fn2_scalar(i32 %arg) {
+entry:
+  %mul = mul i32 %arg, 13
+  %shl = shl i32 %mul, 7
+  ret i32 %shl
+}
+
+; CHECK-LABEL: fn1_scalar_undef:
+; CHECK:      mov w0
+; CHECK-NEXT: ret
+define i32 @fn1_scalar_undef(i32 %arg) {
+entry:
+  %shl = shl i32 %arg, 7
+  %mul = mul i32 %shl, undef
+  ret i32 %mul
+}
+
+; CHECK-LABEL: fn2_scalar_undef:
+; CHECK:      mov w0
+; CHECK-NEXT: ret
+define i32 @fn2_scalar_undef(i32 %arg) {
+entry:
+  %mul = mul i32 %arg, undef
+  %shl = shl i32 %mul, 7
+  ret i32 %shl
+}
+
+; CHECK-LABEL: fn1_scalar_opaque:
+; CHECK:      mov w[[REG:[0-9]+]], #13
+; CHECK-NEXT: mul w[[REG]], w0, w[[REG]]
+; CHECK-NEXT: lsl w0, w[[REG]], #7
+; CHECK-NEXT: ret
+define i32 @fn1_scalar_opaque(i32 %arg) {
+entry:
+  %bitcast = bitcast i32 13 to i32
+  %shl = shl i32 %arg, 7
+  %mul = mul i32 %shl, %bitcast
+  ret i32 %mul
+}
+
+; CHECK-LABEL: fn2_scalar_opaque:
+; CHECK:      mov w[[REG:[0-9]+]], #13
+; CHECK-NEXT: mul w[[REG]], w0, w[[REG]]
+; CHECK-NEXT: lsl w0, w[[REG]], #7
+; CHECK-NEXT: ret
+define i32 @fn2_scalar_opaque(i32 %arg) {
+entry:
+  %bitcast = bitcast i32 13 to i32
+  %mul = mul i32 %arg, %bitcast
+  %shl = shl i32 %mul, 7
+  ret i32 %shl
+}
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
index f89d7603fd3e..4cba339ee4a7 100644
--- a/test/CodeGen/AArch64/directcond.ll
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-NOFP %s
 
 define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_select_i32:
diff --git a/test/CodeGen/AArch64/div_minsize.ll b/test/CodeGen/AArch64/div_minsize.ll
index 43f12340f19f..f62ef4ee4a2d 100644
--- a/test/CodeGen/AArch64/div_minsize.ll
+++ b/test/CodeGen/AArch64/div_minsize.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 define i32 @testsize1(i32 %x) minsize nounwind {
 entry:
diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll
index f50504a9a260..613c71a558bd 100644
--- a/test/CodeGen/AArch64/f16-instructions.ll
+++ b/test/CodeGen/AArch64/f16-instructions.ll
@@ -185,9 +185,8 @@ define i1 @test_fcmp_une(half %a, half %b) #0 {
 ; CHECK-NEXT: fcvt s1, h1
 ; CHECK-NEXT: fcvt s0, h0
 ; CHECK-NEXT: fcmp s0, s1
-; CHECK-NEXT: orr [[TRUE:w[0-9]+]], wzr, #0x1
-; CHECK-NEXT: csel [[CC:w[0-9]+]], [[TRUE]], wzr, eq
-; CHECK-NEXT: csel w0, [[TRUE]], [[CC]], vs
+; CHECK-NEXT: cset [[TRUE:w[0-9]+]], eq
+; CHECK-NEXT: csinc w0, [[TRUE]], wzr, vc
 ; CHECK-NEXT: ret
 define i1 @test_fcmp_ueq(half %a, half %b) #0 {
   %r = fcmp ueq half %a, %b
@@ -254,9 +253,8 @@ define i1 @test_fcmp_uno(half %a, half %b) #0 {
 ; CHECK-NEXT: fcvt s1, h1
 ; CHECK-NEXT: fcvt s0, h0
 ; CHECK-NEXT: fcmp s0, s1
-; CHECK-NEXT: orr [[TRUE:w[0-9]+]], wzr, #0x1
-; CHECK-NEXT: csel [[CC:w[0-9]+]], [[TRUE]], wzr, mi
-; CHECK-NEXT: csel w0, [[TRUE]], [[CC]], gt
+; CHECK-NEXT: cset [[TRUE:w[0-9]+]], mi
+; CHECK-NEXT: csinc w0, [[TRUE]], wzr, le
 ; CHECK-NEXT: ret
 define i1 @test_fcmp_one(half %a, half %b) #0 {
   %r = fcmp one half %a, %b
diff --git a/test/CodeGen/AArch64/fast-isel-assume.ll b/test/CodeGen/AArch64/fast-isel-assume.ll
new file mode 100644
index 000000000000..d39a907407db
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-assume.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=aarch64-- -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s
+
+; Check that we ignore the assume intrinsic.
+
+; CHECK-LABEL: test:
+; CHECK: // BB#0:
+; CHECK-NEXT: ret
+define void @test(i32 %a) {
+  %tmp0 = icmp slt i32 %a, 0
+  call void @llvm.assume(i1 %tmp0)
+  ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/CodeGen/AArch64/fast-isel-atomic.ll b/test/CodeGen/AArch64/fast-isel-atomic.ll
new file mode 100644
index 000000000000..195b8befc8e1
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-atomic.ll
@@ -0,0 +1,244 @@
+; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-- -O0 -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s
+
+; Note that checking SelectionDAG output isn't strictly necessary, but they
+; currently match, so we might as well check both!  Feel free to remove SDAG.
+
+; CHECK-LABEL: atomic_store_monotonic_8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  strb  w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_8(i8* %p, i8 %val) #0 {
+  store atomic i8 %val, i8* %p monotonic, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_8_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  strb w1, [x0, #1]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_8_off(i8* %p, i8 %val) #0 {
+  %tmp0 = getelementptr i8, i8* %p, i32 1
+  store atomic i8 %val, i8* %tmp0 monotonic, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  strh  w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_16(i16* %p, i16 %val) #0 {
+  store atomic i16 %val, i16* %p monotonic, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_16_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  strh w1, [x0, #2]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_16_off(i16* %p, i16 %val) #0 {
+  %tmp0 = getelementptr i16, i16* %p, i32 1
+  store atomic i16 %val, i16* %tmp0 monotonic, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  str  w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_32(i32* %p, i32 %val) #0 {
+  store atomic i32 %val, i32* %p monotonic, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_32_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  str w1, [x0, #4]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_32_off(i32* %p, i32 %val) #0 {
+  %tmp0 = getelementptr i32, i32* %p, i32 1
+  store atomic i32 %val, i32* %tmp0 monotonic, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  str  x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_64(i64* %p, i64 %val) #0 {
+  store atomic i64 %val, i64* %p monotonic, align 8
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_64_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  str x1, [x0, #8]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_64_off(i64* %p, i64 %val) #0 {
+  %tmp0 = getelementptr i64, i64* %p, i32 1
+  store atomic i64 %val, i64* %tmp0 monotonic, align 8
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlrb w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_8(i8* %p, i8 %val) #0 {
+  store atomic i8 %val, i8* %p release, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_8_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #1
+; CHECK-NEXT:  stlrb w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_8_off(i8* %p, i8 %val) #0 {
+  %tmp0 = getelementptr i8, i8* %p, i32 1
+  store atomic i8 %val, i8* %tmp0 release, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlrh w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_16(i16* %p, i16 %val) #0 {
+  store atomic i16 %val, i16* %p release, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_16_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #2
+; CHECK-NEXT:  stlrh w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_16_off(i16* %p, i16 %val) #0 {
+  %tmp0 = getelementptr i16, i16* %p, i32 1
+  store atomic i16 %val, i16* %tmp0 release, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlr w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_32(i32* %p, i32 %val) #0 {
+  store atomic i32 %val, i32* %p release, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_32_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #4
+; CHECK-NEXT:  stlr w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_32_off(i32* %p, i32 %val) #0 {
+  %tmp0 = getelementptr i32, i32* %p, i32 1
+  store atomic i32 %val, i32* %tmp0 release, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlr x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_64(i64* %p, i64 %val) #0 {
+  store atomic i64 %val, i64* %p release, align 8
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_64_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #8
+; CHECK-NEXT:  stlr x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_64_off(i64* %p, i64 %val) #0 {
+  %tmp0 = getelementptr i64, i64* %p, i32 1
+  store atomic i64 %val, i64* %tmp0 release, align 8
+  ret void
+}
+
+
+; CHECK-LABEL: atomic_store_seq_cst_8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlrb w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_8(i8* %p, i8 %val) #0 {
+  store atomic i8 %val, i8* %p seq_cst, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_8_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #1
+; CHECK-NEXT:  stlrb w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_8_off(i8* %p, i8 %val) #0 {
+  %tmp0 = getelementptr i8, i8* %p, i32 1
+  store atomic i8 %val, i8* %tmp0 seq_cst, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlrh w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_16(i16* %p, i16 %val) #0 {
+  store atomic i16 %val, i16* %p seq_cst, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_16_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #2
+; CHECK-NEXT:  stlrh w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_16_off(i16* %p, i16 %val) #0 {
+  %tmp0 = getelementptr i16, i16* %p, i32 1
+  store atomic i16 %val, i16* %tmp0 seq_cst, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlr w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_32(i32* %p, i32 %val) #0 {
+  store atomic i32 %val, i32* %p seq_cst, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_32_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #4
+; CHECK-NEXT:  stlr w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_32_off(i32* %p, i32 %val) #0 {
+  %tmp0 = getelementptr i32, i32* %p, i32 1
+  store atomic i32 %val, i32* %tmp0 seq_cst, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlr x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_64(i64* %p, i64 %val) #0 {
+  store atomic i64 %val, i64* %p seq_cst, align 8
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_64_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #8
+; CHECK-NEXT:  stlr x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_64_off(i64* %p, i64 %val) #0 {
+  %tmp0 = getelementptr i64, i64* %p, i32 1
+  store atomic i64 %val, i64* %tmp0 seq_cst, align 8
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fast-isel-branch_weights.ll b/test/CodeGen/AArch64/fast-isel-branch_weights.ll
index ff57bbb33c48..c749e4d4041b 100644
--- a/test/CodeGen/AArch64/fast-isel-branch_weights.ll
+++ b/test/CodeGen/AArch64/fast-isel-branch_weights.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-darwin -aarch64-atomic-cfg-tidy=0                             -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-apple-darwin -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -aarch64-enable-atomic-cfg-tidy=0                               -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -aarch64-enable-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
 
 ; Test if the BBs are reordred according to their branch weights.
 define i64 @branch_weights_test(i64 %a, i64 %b) {
diff --git a/test/CodeGen/AArch64/fast-isel-cbz.ll b/test/CodeGen/AArch64/fast-isel-cbz.ll
index a407b269dd82..45cc678a0a15 100644
--- a/test/CodeGen/AArch64/fast-isel-cbz.ll
+++ b/test/CodeGen/AArch64/fast-isel-cbz.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
 
 define i32 @icmp_eq_i1(i1 %a) {
 ; CHECK-LABEL: icmp_eq_i1
diff --git a/test/CodeGen/AArch64/fast-isel-cmp-branch.ll b/test/CodeGen/AArch64/fast-isel-cmp-branch.ll
index 1ac358f37aa8..ce47bc42453c 100644
--- a/test/CodeGen/AArch64/fast-isel-cmp-branch.ll
+++ b/test/CodeGen/AArch64/fast-isel-cmp-branch.ll
@@ -1,5 +1,5 @@
-; RUN: llc                             -aarch64-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s
-; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc                               -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s
 
 define i32 @fcmp_oeq(float %x, float %y) {
 ; CHECK-LABEL: fcmp_oeq
diff --git a/test/CodeGen/AArch64/fast-isel-cmp-vec.ll b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
index 2a0139ed9b08..89b368fa19bb 100644
--- a/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
+++ b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs \
-; RUN:   -aarch64-atomic-cfg-tidy=0 -disable-cgp -disable-branch-fold \
+; RUN:   -aarch64-enable-atomic-cfg-tidy=0 -disable-cgp -disable-branch-fold \
 ; RUN:   < %s | FileCheck %s
 
 ;
diff --git a/test/CodeGen/AArch64/fast-isel-cmpxchg.ll b/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
new file mode 100644
index 000000000000..aa78210fae74
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
@@ -0,0 +1,75 @@
+; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: cmpxchg_monotonic_32:
+; CHECK: [[RETRY:.LBB[0-9_]+]]:
+; CHECK-NEXT:     ldaxr [[OLD:w[0-9]+]], [x0]
+; CHECK-NEXT:     cmp [[OLD]], w1
+; CHECK-NEXT:     b.ne [[DONE:.LBB[0-9_]+]]
+; CHECK-NEXT: // BB#2:
+; CHECK-NEXT:     stlxr [[STATUS:w[0-9]+]], w2, [x0]
+; CHECK-NEXT:     cbnz [[STATUS]], [[RETRY]]
+; CHECK-NEXT: [[DONE]]:
+; CHECK-NEXT:     cmp [[OLD]], w1
+; CHECK-NEXT:     cset [[STATUS:w[0-9]+]], eq
+; CHECK-NEXT:     and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
+; CHECK-NEXT:     str [[STATUS32]], [x3]
+; CHECK-NEXT:     mov w0, [[OLD]]
+define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 {
+  %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 0
+  %tmp2 = extractvalue { i32, i1 } %tmp0, 1
+  %tmp3 = zext i1 %tmp2 to i32
+  store i32 %tmp3, i32* %ps
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: cmpxchg_acq_rel_32_load:
+; CHECK:      // BB#0:
+; CHECK:     ldr [[NEW:w[0-9]+]], [x2]
+; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]:
+; CHECK-NEXT:     ldaxr [[OLD:w[0-9]+]], [x0]
+; CHECK-NEXT:     cmp [[OLD]], w1
+; CHECK-NEXT:     b.ne [[DONE:.LBB[0-9_]+]]
+; CHECK-NEXT: // BB#2:
+; CHECK-NEXT:     stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-NEXT:     cbnz [[STATUS]], [[RETRY]]
+; CHECK-NEXT: [[DONE]]:
+; CHECK-NEXT:     cmp [[OLD]], w1
+; CHECK-NEXT:     cset [[STATUS:w[0-9]+]], eq
+; CHECK-NEXT:     and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
+; CHECK-NEXT:     str [[STATUS32]], [x3]
+; CHECK-NEXT:     mov w0, [[OLD]]
+define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 {
+  %new = load i32, i32* %pnew
+  %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 0
+  %tmp2 = extractvalue { i32, i1 } %tmp0, 1
+  %tmp3 = zext i1 %tmp2 to i32
+  store i32 %tmp3, i32* %ps
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: cmpxchg_seq_cst_64:
+; CHECK: [[RETRY:.LBB[0-9_]+]]:
+; CHECK-NEXT:     ldaxr [[OLD:x[0-9]+]], [x0]
+; CHECK-NEXT:     cmp [[OLD]], x1
+; CHECK-NEXT:     b.ne [[DONE:.LBB[0-9_]+]]
+; CHECK-NEXT: // BB#2:
+; CHECK-NEXT:     stlxr [[STATUS:w[0-9]+]], x2, [x0]
+; CHECK-NEXT:     cbnz [[STATUS]], [[RETRY]]
+; CHECK-NEXT: [[DONE]]:
+; CHECK-NEXT:     cmp [[OLD]], x1
+; CHECK-NEXT:     cset [[STATUS:w[0-9]+]], eq
+; CHECK-NEXT:     and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
+; CHECK-NEXT:     str [[STATUS32]], [x3]
+; CHECK-NEXT:     mov x0, [[OLD]]
+define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 {
+  %tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst
+  %tmp1 = extractvalue { i64, i1 } %tmp0, 0
+  %tmp2 = extractvalue { i64, i1 } %tmp0, 1
+  %tmp3 = zext i1 %tmp2 to i32
+  store i32 %tmp3, i32* %ps
+  ret i64 %tmp1
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext2.ll b/test/CodeGen/AArch64/fast-isel-int-ext2.ll
index 93741d6c12d6..b974f412d849 100644
--- a/test/CodeGen/AArch64/fast-isel-int-ext2.ll
+++ b/test/CodeGen/AArch64/fast-isel-int-ext2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=false -disable-cgp-branch-opts -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -aarch64-enable-atomic-cfg-tidy=false -disable-cgp-branch-opts -verify-machineinstrs < %s | FileCheck %s
 
 ;
 ; Test folding of the sign-/zero-extend into the load instruction.
diff --git a/test/CodeGen/AArch64/fast-isel-tbz.ll b/test/CodeGen/AArch64/fast-isel-tbz.ll
index c35ae4230dd4..af817777143d 100644
--- a/test/CodeGen/AArch64/fast-isel-tbz.ll
+++ b/test/CodeGen/AArch64/fast-isel-tbz.ll
@@ -1,5 +1,5 @@
-; RUN: llc -disable-peephole -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
-; RUN: llc -disable-peephole -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s
+; RUN: llc -disable-peephole -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -disable-peephole -fast-isel -fast-isel-abort=1 -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s
 
 define i32 @icmp_eq_i8(i8 zeroext %a) {
 ; CHECK-LABEL: icmp_eq_i8
diff --git a/test/CodeGen/AArch64/fcsel-zero.ll b/test/CodeGen/AArch64/fcsel-zero.ll
new file mode 100644
index 000000000000..3fbcd106d08a
--- /dev/null
+++ b/test/CodeGen/AArch64/fcsel-zero.ll
@@ -0,0 +1,82 @@
+; Check that 0.0 is not materialized for CSEL when comparing against it.
+
+; RUN: llc -mtriple=aarch64-linux-gnu -o - < %s | FileCheck %s
+
+define float @foeq(float %a, float %b) #0 {
+  %t = fcmp oeq float %a, 0.0
+  %v = select i1 %t, float 0.0, float %b
+  ret float %v
+; CHECK-LABEL: foeq
+; CHECK: fcmp [[R:s[0-9]+]], #0.0
+; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq
+}
+
+define float @fueq(float %a, float %b) #0 {
+  %t = fcmp ueq float %a, 0.0
+  %v = select i1 %t, float 0.0, float %b
+  ret float %v
+; CHECK-LABEL: fueq
+; CHECK: fcmp [[R:s[0-9]+]], #0.0
+; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq
+; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, vs
+}
+
+define float @fone(float %a, float %b) #0 {
+  %t = fcmp one float %a, 0.0
+  %v = select i1 %t, float %b, float 0.0
+  ret float %v
+; CHECK-LABEL: fone
+; CHECK: fcmp [[R:s[0-9]+]], #0.0
+; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], mi
+; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], gt
+}
+
+define float @fune(float %a, float %b) #0 {
+  %t = fcmp une float %a, 0.0
+  %v = select i1 %t, float %b, float 0.0
+  ret float %v
+; CHECK-LABEL: fune
+; CHECK: fcmp [[R:s[0-9]+]], #0.0
+; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], ne
+}
+
+define double @doeq(double %a, double %b) #0 {
+  %t = fcmp oeq double %a, 0.0
+  %v = select i1 %t, double 0.0, double %b
+  ret double %v
+; CHECK-LABEL: doeq
+; CHECK: fcmp [[R:d[0-9]+]], #0.0
+; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq
+}
+
+define double @dueq(double %a, double %b) #0 {
+  %t = fcmp ueq double %a, 0.0
+  %v = select i1 %t, double 0.0, double %b
+  ret double %v
+; CHECK-LABEL: dueq
+; CHECK: fcmp [[R:d[0-9]+]], #0.0
+; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq
+; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, vs
+}
+
+define double @done(double %a, double %b) #0 {
+  %t = fcmp one double %a, 0.0
+  %v = select i1 %t, double %b, double 0.0
+  ret double %v
+; CHECK-LABEL: done
+; CHECK: fcmp [[R:d[0-9]+]], #0.0
+; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], mi
+; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], gt
+}
+
+define double @dune(double %a, double %b) #0 {
+  %t = fcmp une double %a, 0.0
+  %v = select i1 %t, double %b, double 0.0
+  ret double %v
+; CHECK-LABEL: dune
+; CHECK: fcmp [[R:d[0-9]+]], #0.0
+; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], ne
+}
+
+attributes #0 = { nounwind "unsafe-fp-math"="true" }
+
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
index 77bbcddc4926..0827fb8c9e8c 100644
--- a/test/CodeGen/AArch64/flags-multiuse.ll
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s
 
 ; LLVM should be able to cope with multiple uses of the same flag-setting
 ; instruction at different points of a routine. Either by rematerializing the
diff --git a/test/CodeGen/AArch64/fptouint-i8-zext.ll b/test/CodeGen/AArch64/fptouint-i8-zext.ll
new file mode 100644
index 000000000000..682683751a8c
--- /dev/null
+++ b/test/CodeGen/AArch64/fptouint-i8-zext.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; CHECK-LABEL: float_char_int_func:
+; CHECK: fcvtzs [[A:w[0-9]+]], s0
+; CHECK-NEXT: and w0, [[A]], #0xff
+; CHECK-NEXT: ret
+define i32 @float_char_int_func(float %infloatVal) {
+entry:
+  %conv = fptoui float %infloatVal to i8
+  %conv1 = zext i8 %conv to i32
+  ret i32 %conv1
+}
diff --git a/test/CodeGen/AArch64/gep-nullptr.ll b/test/CodeGen/AArch64/gep-nullptr.ll
index 4c2bc504cd04..e5e359c0b668 100644
--- a/test/CodeGen/AArch64/gep-nullptr.ll
+++ b/test/CodeGen/AArch64/gep-nullptr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -aarch64-gep-opt=true   < %s |FileCheck %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true   < %s |FileCheck %s
 target datalayout = "e-m:e-i64:64-i128:128-n8:16:32:64-S128"
 target triple = "aarch64--linux-gnu"
 
diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll
index b93f41c07df9..b5a28a18718c 100644
--- a/test/CodeGen/AArch64/global-merge-1.ll
+++ b/test/CodeGen/AArch64/global-merge-1.ll
@@ -1,20 +1,20 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-global-merge -global-merge-on-external -o - | FileCheck %s
 
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-enable-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-enable-global-merge -global-merge-on-external -o - | FileCheck %s
 
-; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
-; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-enable-global-merge -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
 
 @m = internal global i32 0, align 4
 @n = internal global i32 0, align 4
 
 define void @f1(i32 %a1, i32 %a2) {
 ;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: adrp	x8, l__MergedGlobals@PAGE
+;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals@PAGE
 ;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: add	x8, x8, l__MergedGlobals@PAGEOFF
+;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals@PAGEOFF
   store i32 %a1, i32* @m, align 4
   store i32 %a2, i32* @n, align 4
   ret void
@@ -26,6 +26,6 @@ define void @f1(i32 %a1, i32 %a2) {
 ;CHECK: m = .L_MergedGlobals
 ;CHECK: n = .L_MergedGlobals+4
 
-;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals,8,3 ; @_MergedGlobals
+;CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 ; @_MergedGlobals
 ;CHECK-APPLE-IOS-NOT: _m = l__MergedGlobals
 ;CHECK-APPLE-IOS-NOT: _n = l__MergedGlobals+4
diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll
index 53bed1d9bc09..6cd3f5580438 100644
--- a/test/CodeGen/AArch64/global-merge-2.ll
+++ b/test/CodeGen/AArch64/global-merge-2.ll
@@ -1,6 +1,6 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
 
 @x = global i32 0, align 4
 @y = global i32 0, align 4
@@ -9,8 +9,8 @@
 define void @f1(i32 %a1, i32 %a2) {
 ;CHECK-APPLE-IOS-LABEL: _f1:
 ;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: adrp	x8, l__MergedGlobals@PAGE
-;CHECK-APPLE-IOS: add	x8, x8, l__MergedGlobals@PAGEOFF
+;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
 ;CHECK-APPLE-IOS-NOT: adrp
   store i32 %a1, i32* @x, align 4
   store i32 %a2, i32* @y, align 4
@@ -19,8 +19,8 @@ define void @f1(i32 %a1, i32 %a2) {
 
 define void @g1(i32 %a1, i32 %a2) {
 ;CHECK-APPLE-IOS-LABEL: _g1:
-;CHECK-APPLE-IOS: adrp	x8, l__MergedGlobals@PAGE
-;CHECK-APPLE-IOS: add	x8, x8, l__MergedGlobals@PAGEOFF
+;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
 ;CHECK-APPLE-IOS-NOT: adrp
   store i32 %a1, i32* @y, align 4
   store i32 %a2, i32* @z, align 4
@@ -41,12 +41,12 @@ define void @g1(i32 %a1, i32 %a2) {
 ;CHECK: z = .L_MergedGlobals+8
 ;CHECK: .size z, 4
 
-;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals,12,3
+;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_x,12,3
 
 ;CHECK-APPLE-IOS: .globl	_x
-;CHECK-APPLE-IOS:  = l__MergedGlobals
+;CHECK-APPLE-IOS:  = __MergedGlobals_x
 ;CHECK-APPLE-IOS: .globl	_y
-;CHECK-APPLE-IOS: _y = l__MergedGlobals+4
+;CHECK-APPLE-IOS: _y = __MergedGlobals_x+4
 ;CHECK-APPLE-IOS: .globl	_z
-;CHECK-APPLE-IOS: _z = l__MergedGlobals+8
+;CHECK-APPLE-IOS: _z = __MergedGlobals_x+8
 ;CHECK-APPLE-IOS: .subsections_via_symbols
diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll
index 481be4017b00..6418f019f747 100644
--- a/test/CodeGen/AArch64/global-merge-3.ll
+++ b/test/CodeGen/AArch64/global-merge-3.ll
@@ -1,17 +1,17 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-enable-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-enable-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
 
 @x = global [1000 x i32] zeroinitializer, align 1
 @y = global [1000 x i32] zeroinitializer, align 1
 @z = internal global i32 1, align 4
 
 define void @f1(i32 %a1, i32 %a2, i32 %a3) {
-;CHECK-APPLE-IOS: adrp	x8, l__MergedGlobals@PAGE
+;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
 ;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: add	x8, x8, l__MergedGlobals@PAGEOFF
-;CHECK-APPLE-IOS: adrp	x9, l__MergedGlobals.1@PAGE
-;CHECK-APPLE-IOS: add	x9, x9, l__MergedGlobals.1@PAGEOFF
+;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS: adrp	x9, __MergedGlobals_y@PAGE
+;CHECK-APPLE-IOS: add	x9, x9, __MergedGlobals_y@PAGEOFF
   %x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
   %y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
   store i32 %a1, i32* %x3, align 4
@@ -30,11 +30,11 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
 ;CHECK: .comm	.L_MergedGlobals.1,4000,16
 
 ;CHECK-APPLE-IOS: .p2align	4
-;CHECK-APPLE-IOS:  l__MergedGlobals:
+;CHECK-APPLE-IOS:  __MergedGlobals_x:
 ;CHECK-APPLE-IOS: .long 1
 ;CHECK-APPLE-IOS: .space	4000
 
-;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals.1,4000,4
+;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_y,4000,4
 
 ;CHECK: z = .L_MergedGlobals
 ;CHECK:	.globl	x
@@ -44,8 +44,8 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
 ;CHECK: y = .L_MergedGlobals.1
 ;CHECK: .size y, 4000
 
-;CHECK-APPLE-IOS-NOT: _z = l__MergedGlobals
+;CHECK-APPLE-IOS-NOT: _z = __MergedGlobals_x
 ;CHECK-APPLE-IOS:.globl	_x
-;CHECK-APPLE-IOS: _x = l__MergedGlobals+4
+;CHECK-APPLE-IOS: _x = __MergedGlobals_x+4
 ;CHECK-APPLE-IOS:.globl	_y
-;CHECK-APPLE-IOS: _y = l__MergedGlobals.1
+;CHECK-APPLE-IOS: _y = __MergedGlobals_y
diff --git a/test/CodeGen/AArch64/global-merge-4.ll b/test/CodeGen/AArch64/global-merge-4.ll
index a5109f6e8ea5..036b8910d66c 100644
--- a/test/CodeGen/AArch64/global-merge-4.ll
+++ b/test/CodeGen/AArch64/global-merge-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-enable-global-merge -o - | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
 target triple = "arm64-apple-ios7.0.0"
diff --git a/test/CodeGen/AArch64/global-merge-group-by-use.ll b/test/CodeGen/AArch64/global-merge-group-by-use.ll
index 434c787b28da..86104b7285cf 100644
--- a/test/CodeGen/AArch64/global-merge-group-by-use.ll
+++ b/test/CodeGen/AArch64/global-merge-group-by-use.ll
@@ -1,6 +1,7 @@
-; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false -aarch64-collect-loh=false \
-; RUN:   -aarch64-global-merge -global-merge-group-by-use -global-merge-ignore-single-use=false \
-; RUN:   %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false \
+; RUN:   -aarch64-enable-collect-loh=false -aarch64-enable-global-merge \
+; RUN:   -global-merge-group-by-use -global-merge-ignore-single-use=false %s \
+; RUN:   -o - | FileCheck %s
 
 ; We assume that globals of the same size aren't reordered inside a set.
 
@@ -12,7 +13,7 @@
 
 ; CHECK-LABEL: f1:
 define void @f1(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET1:l__MergedGlobals.[0-9]*]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET1:__MergedGlobals.[0-9]*]]@PAGE
 ; CHECK-NEXT: add x8, x8, [[SET1]]@PAGEOFF
 ; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: ret
@@ -27,7 +28,7 @@ define void @f1(i32 %a1, i32 %a2) #0 {
 
 ; CHECK-LABEL: f2:
 define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
-; CHECK-NEXT: adrp x8, [[SET2:l__MergedGlobals.[0-9]*]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET2:__MergedGlobals.[0-9]*]]@PAGE
 ; CHECK-NEXT: add x8, x8, [[SET2]]@PAGEOFF
 ; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: str w2, [x8, #8]
@@ -48,7 +49,7 @@ define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
 ; CHECK-LABEL: f3:
 define void @f3(i32 %a1, i32 %a2) #0 {
 ; CHECK-NEXT: adrp x8, _m3@PAGE
-; CHECK-NEXT: adrp x9, [[SET3:l__MergedGlobals[0-9]*]]@PAGE
+; CHECK-NEXT: adrp x9, [[SET3:__MergedGlobals[0-9]*]]@PAGE
 ; CHECK-NEXT: str w0, [x8, _m3@PAGEOFF]
 ; CHECK-NEXT: str w1, [x9, [[SET3]]@PAGEOFF]
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
index 399438925771..1c1b4f6b0452 100644
--- a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false -aarch64-collect-loh=false \
-; RUN:   -O1 -global-merge-group-by-use -global-merge-ignore-single-use \
-; RUN:   %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false \
+; RUN:   -aarch64-enable-collect-loh=false -O1 -global-merge-group-by-use \
+; RUN:   -global-merge-ignore-single-use %s -o - | FileCheck %s
 
 ; Check that, at -O1, we only merge globals used in minsize functions.
 ; We assume that globals of the same size aren't reordered inside a set.
@@ -11,7 +11,7 @@
 
 ; CHECK-LABEL: f1:
 define void @f1(i32 %a1, i32 %a2) minsize nounwind {
-; CHECK-NEXT: adrp x8, [[SET:l__MergedGlobals]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
 ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
 ; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
index c3756a85feff..97e283c972a5 100644
--- a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
@@ -1,6 +1,7 @@
-; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false -aarch64-collect-loh=false \
-; RUN:   -aarch64-global-merge -global-merge-group-by-use -global-merge-ignore-single-use \
-; RUN:   %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false \
+; RUN:   -aarch64-enable-collect-loh=false -aarch64-enable-global-merge \
+; RUN:   -global-merge-group-by-use -global-merge-ignore-single-use %s -o - \
+; RUN:   | FileCheck %s
 
 ; We assume that globals of the same size aren't reordered inside a set.
 
@@ -10,7 +11,7 @@
 
 ; CHECK-LABEL: f1:
 define void @f1(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET:l__MergedGlobals]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
 ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
 ; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index 16682e92c17d..d6a7fceac84d 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
-; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-enable-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
diff --git a/test/CodeGen/AArch64/large_shift.ll b/test/CodeGen/AArch64/large_shift.ll
index f72c97d25aa3..e0ba5015f576 100644
--- a/test/CodeGen/AArch64/large_shift.ll
+++ b/test/CodeGen/AArch64/large_shift.ll
@@ -1,5 +1,4 @@
-; RUN: llc -march=aarch64 -o - %s
-target triple = "arm64-unknown-unknown"
+; RUN: llc -mtriple=arm64-unknown-unknown -o - %s
 
 ; Make sure we don't run into an assert in the aarch64 code selection when
 ; DAGCombining fails.
diff --git a/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll b/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll
index f65694ab80a1..35117a147eeb 100644
--- a/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll
+++ b/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=aarch64 -aarch64-neon-syntax=apple -aarch64-stp-suppress=false -verify-machineinstrs -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple -aarch64-enable-stp-suppress=false -verify-machineinstrs -asm-verbose=false | FileCheck %s
 
 ; CHECK-LABEL: test_strd_sturd:
 ; CHECK-NEXT: stp d0, d1, [x0, #-8]
diff --git a/test/CodeGen/AArch64/ldst-opt-dbg-limit.mir b/test/CodeGen/AArch64/ldst-opt-dbg-limit.mir
new file mode 100644
index 000000000000..45542cae98fa
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-opt-dbg-limit.mir
@@ -0,0 +1,133 @@
+# RUN: llc -run-pass=aarch64-ldst-opt %s -o - 2>&1 | FileCheck %s
+--- |
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--linux-gnu"
+
+  ; Function Attrs: nounwind
+  define i16 @promote-load-from-store(i32* %dst, i32 %x) #0 {
+    store i32 %x, i32* %dst
+    %dst16 = bitcast i32* %dst to i16*
+    %dst1 = getelementptr inbounds i16, i16* %dst16, i32 1
+    %x16 = load i16, i16* %dst1
+    ret i16 %x16
+  }
+  
+  ; Function Attrs: nounwind
+  define void @store-pair(i32* %dst, i32 %x, i32 %y) #0 {
+    %dst01 = bitcast i32* %dst to i32*
+    %dst1 = getelementptr inbounds i32, i32* %dst, i32 1
+    store i32 %x, i32* %dst01
+    store i32 %x, i32* %dst1
+    ret void
+  }
+  
+  attributes #0 = { nounwind }
+
+...
+---
+name:            promote-load-from-store
+alignment:       2
+exposesReturnsTwice: false
+tracksRegLiveness: true
+liveins:         
+  - { reg: '%x0' }
+  - { reg: '%w1' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %w1, %x0, %lr
+  
+    STRWui killed %w1, %x0, 0 :: (store 4 into %ir.dst)
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    %w0 = LDRHHui killed %x0, 1 :: (load 2 from %ir.dst1)
+    RET %lr, implicit %w0
+
+...
+# CHECK-LABEL: name:            promote-load-from-store
+# CHECK: STRWui %w1
+# CHECK: UBFMWri %w1
+---
+name:            store-pair
+alignment:       2
+exposesReturnsTwice: false
+tracksRegLiveness: true
+liveins:         
+  - { reg: '%x0' }
+  - { reg: '%w1' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %w1, %x0, %lr
+  
+    STRWui %w1, %x0, 0 :: (store 4 into %ir.dst01)
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    CFI_INSTRUCTION 0
+    STRWui killed %w1, killed %x0, 1 :: (store 4 into %ir.dst1)
+    RET %lr
+
+...
+# CHECK-LABEL: name:            store-pair
+# CHECK: STPWi
diff --git a/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir b/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir
new file mode 100644
index 000000000000..75ad849e4f36
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir
@@ -0,0 +1,27 @@
+
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt  -verify-machineinstrs  -o - %s | FileCheck %s
+
+--- |
+  define i1 @no-clobber-zr(i64* %p, i64 %x) { ret i1 0 }
+...
+---
+# Check that write of xzr doesn't inhibit pairing of xzr stores since
+# it isn't actually clobbered.  Written as a MIR test to avoid
+# schedulers reordering instructions such that SUBS doesn't appear
+# between stores.
+# CHECK-LABEL: name: no-clobber-zr
+# CHECK: STPXi %xzr, %xzr, %x0, 0
+name: no-clobber-zr
+body: |
+  bb.0:
+    liveins: %x0,  %x1
+    STRXui %xzr, %x0, 0 :: (store 8 into %ir.p)
+    dead %xzr = SUBSXri killed %x1, 0, 0, implicit-def %nzcv
+    %w8 = CSINCWr %wzr, %wzr, 1, implicit killed %nzcv
+    STRXui %xzr, killed %x0, 1 :: (store 8 into %ir.p)
+    %w0 = ORRWrs %wzr, killed %w8, 0
+    RET %lr, implicit %w0
+...
+
+
+
diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
index a7b6399b7cd1..81e4b19e6eea 100644
--- a/test/CodeGen/AArch64/ldst-opt.ll
+++ b/test/CodeGen/AArch64/ldst-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck %s
 
 ; This file contains tests for the AArch64 load/store optimizer.
 
@@ -1333,3 +1333,225 @@ for.body:
 end:
   ret void
 }
+
+; DAGCombiner::MergeConsecutiveStores merges this into a vector store,
+; replaceZeroVectorStore should split the vector store back into
+; scalar stores which should get merged by AArch64LoadStoreOptimizer.
+define void @merge_zr32(i32* %p) {
+; CHECK-LABEL: merge_zr32:
+; CHECK: // %entry
+; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store i32 0, i32* %p
+  %p1 = getelementptr i32, i32* %p, i32 1
+  store i32 0, i32* %p1
+  ret void
+}
+
+; Same sa merge_zr32 but the merged stores should also get paried.
+define void @merge_zr32_2(i32* %p) {
+; CHECK-LABEL: merge_zr32_2:
+; CHECK: // %entry
+; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store i32 0, i32* %p
+  %p1 = getelementptr i32, i32* %p, i32 1
+  store i32 0, i32* %p1
+  %p2 = getelementptr i32, i32* %p, i64 2
+  store i32 0, i32* %p2
+  %p3 = getelementptr i32, i32* %p, i64 3
+  store i32 0, i32* %p3
+  ret void
+}
+
+; Like merge_zr32_2, but checking the largest allowed stp immediate offset.
+define void @merge_zr32_2_offset(i32* %p) {
+; CHECK-LABEL: merge_zr32_2_offset:
+; CHECK: // %entry
+; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #504]
+; CHECK-NEXT: ret
+entry:
+  %p0 = getelementptr i32, i32* %p, i32 126
+  store i32 0, i32* %p0
+  %p1 = getelementptr i32, i32* %p, i32 127
+  store i32 0, i32* %p1
+  %p2 = getelementptr i32, i32* %p, i64 128
+  store i32 0, i32* %p2
+  %p3 = getelementptr i32, i32* %p, i64 129
+  store i32 0, i32* %p3
+  ret void
+}
+
+; Like merge_zr32, but replaceZeroVectorStore should not split this
+; vector store since the address offset is too large for the stp
+; instruction.
+define void @no_merge_zr32_2_offset(i32* %p) {
+; CHECK-LABEL: no_merge_zr32_2_offset:
+; CHECK: // %entry
+; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
+; CHECK-NEXT: str q[[REG]], [x{{[0-9]+}}, #4096]
+; CHECK-NEXT: ret
+entry:
+  %p0 = getelementptr i32, i32* %p, i32 1024
+  store i32 0, i32* %p0
+  %p1 = getelementptr i32, i32* %p, i32 1025
+  store i32 0, i32* %p1
+  %p2 = getelementptr i32, i32* %p, i64 1026
+  store i32 0, i32* %p2
+  %p3 = getelementptr i32, i32* %p, i64 1027
+  store i32 0, i32* %p3
+  ret void
+}
+
+; Like merge_zr32, but replaceZeroVectorStore should not split the
+; vector store since the zero constant vector has multiple uses, so we
+; err on the side that allows for stp q instruction generation.
+define void @merge_zr32_3(i32* %p) {
+; CHECK-LABEL: merge_zr32_3:
+; CHECK: // %entry
+; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
+; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store i32 0, i32* %p
+  %p1 = getelementptr i32, i32* %p, i32 1
+  store i32 0, i32* %p1
+  %p2 = getelementptr i32, i32* %p, i64 2
+  store i32 0, i32* %p2
+  %p3 = getelementptr i32, i32* %p, i64 3
+  store i32 0, i32* %p3
+  %p4 = getelementptr i32, i32* %p, i64 4
+  store i32 0, i32* %p4
+  %p5 = getelementptr i32, i32* %p, i64 5
+  store i32 0, i32* %p5
+  %p6 = getelementptr i32, i32* %p, i64 6
+  store i32 0, i32* %p6
+  %p7 = getelementptr i32, i32* %p, i64 7
+  store i32 0, i32* %p7
+  ret void
+}
+
+; Like merge_zr32, but with 2-vector type.
+define void @merge_zr32_2vec(<2 x i32>* %p) {
+; CHECK-LABEL: merge_zr32_2vec:
+; CHECK: // %entry
+; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store <2 x i32> zeroinitializer, <2 x i32>* %p
+  ret void
+}
+
+; Like merge_zr32, but with 3-vector type.
+define void @merge_zr32_3vec(<3 x i32>* %p) {
+; CHECK-LABEL: merge_zr32_3vec:
+; CHECK: // %entry
+; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: str wzr, [x{{[0-9]+}}, #8]
+; CHECK-NEXT: ret
+entry:
+  store <3 x i32> zeroinitializer, <3 x i32>* %p
+  ret void
+}
+
+; Like merge_zr32, but with 4-vector type.
+define void @merge_zr32_4vec(<4 x i32>* %p) {
+; CHECK-LABEL: merge_zr32_4vec:
+; CHECK: // %entry
+; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store <4 x i32> zeroinitializer, <4 x i32>* %p
+  ret void
+}
+
+; Like merge_zr32, but with 2-vector float type.
+define void @merge_zr32_2vecf(<2 x float>* %p) {
+; CHECK-LABEL: merge_zr32_2vecf:
+; CHECK: // %entry
+; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store <2 x float> zeroinitializer, <2 x float>* %p
+  ret void
+}
+
+; Like merge_zr32, but with 4-vector float type.
+define void @merge_zr32_4vecf(<4 x float>* %p) {
+; CHECK-LABEL: merge_zr32_4vecf:
+; CHECK: // %entry
+; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store <4 x float> zeroinitializer, <4 x float>* %p
+  ret void
+}
+
+; Similar to merge_zr32, but for 64-bit values.
+define void @merge_zr64(i64* %p) {
+; CHECK-LABEL: merge_zr64:
+; CHECK: // %entry
+; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store i64 0, i64* %p
+  %p1 = getelementptr i64, i64* %p, i64 1
+  store i64 0, i64* %p1
+  ret void
+}
+
+; Similar to merge_zr32_3, replaceZeroVectorStore should not split the
+; vector store since the zero constant vector has multiple uses.
+define void @merge_zr64_2(i64* %p) {
+; CHECK-LABEL: merge_zr64_2:
+; CHECK: // %entry
+; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
+; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store i64 0, i64* %p
+  %p1 = getelementptr i64, i64* %p, i64 1
+  store i64 0, i64* %p1
+  %p2 = getelementptr i64, i64* %p, i64 2
+  store i64 0, i64* %p2
+  %p3 = getelementptr i64, i64* %p, i64 3
+  store i64 0, i64* %p3
+  ret void
+}
+
+; Like merge_zr64, but with 2-vector double type.
+define void @merge_zr64_2vecd(<2 x double>* %p) {
+; CHECK-LABEL: merge_zr64_2vecd:
+; CHECK: // %entry
+; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store <2 x double> zeroinitializer, <2 x double>* %p
+  ret void
+}
+
+; Like merge_zr64, but with 3-vector i64 type.
+define void @merge_zr64_3vec(<3 x i64>* %p) {
+; CHECK-LABEL: merge_zr64_3vec:
+; CHECK: // %entry
+; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
+; CHECK-NEXT: str xzr, [x{{[0-9]+}}, #16]
+; CHECK-NEXT: ret
+entry:
+  store <3 x i64> zeroinitializer, <3 x i64>* %p
+  ret void
+}
+
+; Like merge_zr64_2, but with 4-vector double type.
+define void @merge_zr64_4vecd(<4 x double>* %p) {
+; CHECK-LABEL: merge_zr64_4vecd:
+; CHECK: // %entry
+; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000
+; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}]
+; CHECK-NEXT: ret
+entry:
+  store <4 x double> zeroinitializer, <4 x double>* %p
+  ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/test/CodeGen/AArch64/ldst-paired-aliasing.ll
index 035e911b3c76..9c698b5fdcc6 100644
--- a/test/CodeGen/AArch64/ldst-paired-aliasing.ll
+++ b/test/CodeGen/AArch64/ldst-paired-aliasing.ll
@@ -10,11 +10,11 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3
 define i32 @main() local_unnamed_addr #1 {
 ; Make sure the stores happen in the correct order (the exact instructions could change).
 ; CHECK-LABEL: main:
+; CHECK: stp xzr, xzr, [sp, #72]
+; CHECK: str w9, [sp, #80]
 ; CHECK: str q0, [sp, #48]
 ; CHECK: ldr w8, [sp, #48]
-; CHECK: stur q1, [sp, #72]
 ; CHECK: str q0, [sp, #64]
-; CHECK: str w9, [sp, #80]
 
 for.body.lr.ph.i.i.i.i.i.i63:
   %b1 = alloca [10 x i32], align 16
diff --git a/test/CodeGen/AArch64/legalize-bug-bogus-cpu.ll b/test/CodeGen/AArch64/legalize-bug-bogus-cpu.ll
index b785a8f045f4..a96a3c5f4881 100644
--- a/test/CodeGen/AArch64/legalize-bug-bogus-cpu.ll
+++ b/test/CodeGen/AArch64/legalize-bug-bogus-cpu.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=aarch64 -mcpu=bogus -o - %s
+; RUN: llc < %s -mtriple=aarch64-eabi -mcpu=bogus
 
 ; Fix the bug in PR20557. Set mcpu to a bogus name, llc will crash in type
 ; legalization.
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
index f4f77c5aa312..7184443994b6 100644
--- a/test/CodeGen/AArch64/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -1,8 +1,2 @@
-import re
-
 if not 'AArch64' in config.root.targets:
     config.unsupported = True
-
-# For now we don't test arm64-win32.
-if re.search(r'cygwin|mingw32|win32|windows-gnu|windows-msvc', config.target_triple):
-    config.unsupported = True
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll
index 6b3246d1db8b..1c15f1521c56 100644
--- a/test/CodeGen/AArch64/logical_shifted_reg.ll
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -198,7 +198,7 @@ define void @flag_setting() {
 ; CHECK: b.gt .L
   %simple_and = and i64 %val1, %val2
   %tst1 = icmp sgt i64 %simple_and, 0
-  br i1 %tst1, label %ret, label %test2
+  br i1 %tst1, label %ret, label %test2, !prof !1
 
 test2:
 ; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
@@ -206,7 +206,7 @@ test2:
   %shifted_op = shl i64 %val2, 63
   %shifted_and = and i64 %val1, %shifted_op
   %tst2 = icmp slt i64 %shifted_and, 0
-  br i1 %tst2, label %ret, label %test3
+  br i1 %tst2, label %ret, label %test3, !prof !1
 
 test3:
 ; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12
@@ -214,7 +214,7 @@ test3:
   %asr_op = ashr i64 %val2, 12
   %asr_and = and i64 %asr_op, %val1
   %tst3 = icmp sgt i64 %asr_and, 0
-  br i1 %tst3, label %ret, label %other_exit
+  br i1 %tst3, label %ret, label %other_exit, !prof !1
 
 other_exit:
   store volatile i64 %val1, i64* @var1_64
@@ -222,3 +222,5 @@ other_exit:
 ret:
   ret void
 }
+
+!1 = !{!"branch_weights", i32 1, i32 1}
diff --git a/test/CodeGen/AArch64/lower-range-metadata-func-call.ll b/test/CodeGen/AArch64/lower-range-metadata-func-call.ll
index fd4b2f5ba305..4075db10c42b 100644
--- a/test/CodeGen/AArch64/lower-range-metadata-func-call.ll
+++ b/test/CodeGen/AArch64/lower-range-metadata-func-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 ; and can be eliminated
 ; CHECK-LABEL: {{^}}test_call_known_max_range:
diff --git a/test/CodeGen/AArch64/machine-combiner-madd.ll b/test/CodeGen/AArch64/machine-combiner-madd.ll
new file mode 100644
index 000000000000..ea3113789461
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-combiner-madd.ll
@@ -0,0 +1,40 @@
+; Test all AArch64 subarches with scheduling models.
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a73 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cyclone    < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m1  < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m2  < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=kryo       < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=vulcan     < %s | FileCheck %s
+
+; Make sure that inst-combine fuses the multiply add in the addressing mode of
+; the load.
+
+; CHECK-LABEL: fun:
+; CHECK-NOT: mul
+; CHECK:     madd
+; CHECK-NOT: mul
+
+%class.D = type { %class.basic_string.base, [4 x i8] }
+%class.basic_string.base = type <{ i64, i64, i32 }>
+@a = global %class.D* zeroinitializer, align 8
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+define internal void @fun() section ".text.startup" {
+entry:
+  %tmp.i.i = alloca %class.D, align 8
+  %y = bitcast %class.D* %tmp.i.i to i8*
+  br label %loop
+loop:
+  %conv11.i.i = phi i64 [ 0, %entry ], [ %inc.i.i, %loop ]
+  %i = phi i64 [ undef, %entry ], [ %inc.i.i, %loop ]
+  %x = load %class.D*, %class.D** getelementptr inbounds (%class.D*, %class.D** @a, i64 0), align 8
+  %arrayidx.i.i.i = getelementptr inbounds %class.D, %class.D* %x, i64 %conv11.i.i
+  %d = bitcast %class.D* %arrayidx.i.i.i to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %y, i8* %d, i64 24, i32 8, i1 false)
+  %inc.i.i = add i64 %i, 1
+  %cmp.i.i = icmp slt i64 %inc.i.i, 0
+  br i1 %cmp.i.i, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/machine-dead-copy.mir b/test/CodeGen/AArch64/machine-dead-copy.mir
new file mode 100644
index 000000000000..cb552e5cab3d
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-dead-copy.mir
@@ -0,0 +1,67 @@
+
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-cp  -verify-machineinstrs  -o - %s | FileCheck %s
+
+--- |
+  define i32 @copyprop1(i32 %a, i32 %b) { ret i32 %a }
+  define i32 @copyprop2(i32 %a, i32 %b) { ret i32 %a }
+  define i32 @copyprop3(i32 %a, i32 %b) { ret i32 %a }
+  define i32 @copyprop4(i32 %a, i32 %b) { ret i32 %a }
+  declare i32 @foo(i32)
+...
+---
+# The first copy is dead copy which is not used.
+# CHECK-LABEL: name: copyprop1
+# CHECK: bb.0:
+# CHECK-NOT: %w20 = COPY
+name: copyprop1
+body: |
+  bb.0:
+    liveins: %w0,  %w1
+    %w20 = COPY %w1
+    BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0
+    RET_ReallyLR implicit %w0
+...
+---
+# The first copy is not a dead copy which is used in the second copy after the
+# call.
+# CHECK-LABEL: name: copyprop2
+# CHECK: bb.0:
+# CHECK: %w20 = COPY
+name: copyprop2
+body: |
+  bb.0:
+    liveins: %w0,  %w1
+    %w20 = COPY %w1
+    BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0
+    %w0 = COPY %w20
+    RET_ReallyLR implicit %w0
+...
+---
+# Both the first and second copy are dead copies which are not used.
+# CHECK-LABEL: name: copyprop3
+# CHECK: bb.0:
+# CHECK-NOT: COPY
+name: copyprop3
+body: |
+  bb.0:
+    liveins: %w0,  %w1
+    %w20 = COPY %w1
+    BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0
+    %w20 = COPY %w0
+    RET_ReallyLR implicit %w0
+...
+# The second copy is removed as a NOP copy, after then the first copy become
+# dead which should be removed as well.
+# CHECK-LABEL: name: copyprop4
+# CHECK: bb.0:
+# CHECK-NOT: COPY
+name: copyprop4
+body: |
+  bb.0:
+    liveins: %w0,  %w1
+    %w20 = COPY %w0
+    %w0 = COPY %w20
+    BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0
+    RET_ReallyLR implicit %w0
+...
+
diff --git a/test/CodeGen/AArch64/machine-scheduler.mir b/test/CodeGen/AArch64/machine-scheduler.mir
new file mode 100644
index 000000000000..e7e0dda53c57
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-scheduler.mir
@@ -0,0 +1,34 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-scheduler  -verify-machineinstrs  -o - %s | FileCheck %s
+
+--- |
+  define i64 @load_imp-def(i64* nocapture %P, i32 %v) {
+  entry:
+    %0 = bitcast i64* %P to i32*
+    %1 = load i32, i32* %0
+    %conv = zext i32 %1 to i64
+    %arrayidx19 = getelementptr inbounds i64, i64* %P, i64 1
+    %arrayidx1 = bitcast i64* %arrayidx19 to i32*
+    store i32 %v, i32* %arrayidx1
+    %2 = load i64, i64* %arrayidx19
+    %and = and i64 %2, 4294967295
+    %add = add nuw nsw i64 %and, %conv
+    ret i64 %add
+  }
+...
+---
+# CHECK-LABEL: name: load_imp-def
+# CHECK: bb.0.entry:
+# CHECK: LDRWui %x0, 0
+# CHECK: LDRWui %x0, 1
+# CHECK: STRWui %w1, %x0, 2
+name:            load_imp-def
+body:             |
+  bb.0.entry:
+    liveins: %w1, %x0
+    %w8 = LDRWui %x0, 1, implicit-def %x8  :: (load 4 from %ir.0)
+    STRWui killed %w1, %x0, 2 :: (store 4 into %ir.arrayidx1)
+    %w9 = LDRWui killed %x0, 0, implicit-def %x9  :: (load 4 from %ir.arrayidx19, align 8)
+    %x0 = ADDXrr killed %x9, killed %x8
+    RET_ReallyLR implicit %x0
+...
+
diff --git a/test/CodeGen/AArch64/machine-sink-zr.mir b/test/CodeGen/AArch64/machine-sink-zr.mir
new file mode 100644
index 000000000000..535fba0dc63b
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-sink-zr.mir
@@ -0,0 +1,48 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-sink -o - %s | FileCheck %s
+--- |
+  define void @sinkwzr() { ret void }
+...
+---
+name:            sinkwzr
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr32 }
+  - { id: 1, class: gpr32 }
+  - { id: 2, class: gpr32sp }
+  - { id: 3, class: gpr32 }
+  - { id: 4, class: gpr32 }
+body:             |
+  ; Check that WZR copy is sunk into the loop preheader.
+  ; CHECK-LABEL: name: sinkwzr
+  ; CHECK-LABEL: bb.0:
+  ; CHECK-NOT: COPY %wzr
+  bb.0:
+    successors: %bb.3, %bb.1
+    liveins: %w0
+
+    %0 = COPY %w0
+    %1 = COPY %wzr
+    CBZW %0, %bb.3
+
+  ; CHECK-LABEL: bb.1:
+  ; CHECK: COPY %wzr
+
+  bb.1:
+    successors: %bb.2
+
+    B %bb.2
+
+  bb.2:
+    successors: %bb.3, %bb.2
+
+    %2 = PHI %0, %bb.1, %4, %bb.2
+    %w0 = COPY %1
+    %3 = SUBSWri %2, 1, 0, implicit-def dead %nzcv
+    %4 = COPY %3
+    CBZW %3, %bb.3
+    B %bb.2
+
+  bb.3:
+    RET_ReallyLR
+
+...
diff --git a/test/CodeGen/AArch64/machine_cse.ll b/test/CodeGen/AArch64/machine_cse.ll
index 032199e62181..e9fa68041d90 100644
--- a/test/CodeGen/AArch64/machine_cse.ll
+++ b/test/CodeGen/AArch64/machine_cse.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s
+; -tail-dup-placement causes tail duplication during layout. This breaks the
+; assumptions of the test case as written (specifically, it creates an
+; additional cmp instruction, creating a false positive), so we pass
+; -tail-dup-placement=0 to restore the original behavior
 
 ; marked as external to prevent possible optimizations
 @a = external global i32
diff --git a/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll b/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
index e77824f5f142..f1cd21dce45a 100644
--- a/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
+++ b/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
@@ -5,12 +5,11 @@
 ; The verifier would complain otherwise.
 define i64 @csed-impdef-killflag(i64 %a) {
 ; CHECK-LABEL: csed-impdef-killflag
-; CHECK-DAG:  mov    [[REG0:w[0-9]+]], wzr
 ; CHECK-DAG:  orr    [[REG1:w[0-9]+]], wzr, #0x1
 ; CHECK-DAG:  orr    [[REG2:x[0-9]+]], xzr, #0x2
 ; CHECK-DAG:  orr    [[REG3:x[0-9]+]], xzr, #0x3
-; CHECK:      cmp    x0, #0
-; CHECK-DAG:  csel   w[[SELECT_WREG_1:[0-9]+]], [[REG0]], [[REG1]], ne
+; CHECK-DAG:  cmp    x0, #0
+; CHECK:  csel   w[[SELECT_WREG_1:[0-9]+]], wzr, [[REG1]], ne
 ; CHECK-DAG:  csel   [[SELECT_XREG_2:x[0-9]+]], [[REG2]], [[REG3]], ne
 ; CHECK:      ubfx   [[SELECT_XREG_1:x[0-9]+]], x[[SELECT_WREG_1]], #0, #32
 ; CHECK-NEXT: add    x0, [[SELECT_XREG_2]], [[SELECT_XREG_1]]
diff --git a/test/CodeGen/AArch64/max-jump-table.ll b/test/CodeGen/AArch64/max-jump-table.ll
new file mode 100644
index 000000000000..070502052fff
--- /dev/null
+++ b/test/CodeGen/AArch64/max-jump-table.ll
@@ -0,0 +1,93 @@
+; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40                        -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0  < %t
+; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4  < %t
+; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8  < %t
+; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m1        -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM1 < %t
+
+declare void @ext(i32)
+
+define i32 @jt1(i32 %a, i32 %b) {
+entry:
+  switch i32 %a, label %return [
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+    i32 5, label %bb5
+    i32 6, label %bb6
+    i32 7, label %bb7
+    i32 8, label %bb8
+    i32 9, label %bb9
+    i32 10, label %bb10
+    i32 11, label %bb11
+    i32 12, label %bb12
+    i32 13, label %bb13
+    i32 14, label %bb14
+    i32 15, label %bb15
+    i32 16, label %bb16
+    i32 17, label %bb17
+  ]
+; CHECK-LABEL: function jt1:
+; CHECK-NEXT: Jump Tables:
+; CHECK0-NEXT: jt#0:
+; CHECK0-NOT: jt#1:
+; CHECK4-NEXT: jt#0:
+; CHECK4-SAME: jt#1:
+; CHECK4-SAME: jt#2:
+; CHECK4-SAME: jt#3:
+; CHECK4-NOT: jt#4:
+; CHECK8-NEXT: jt#0:
+; CHECK8-SAME: jt#1:
+; CHECK8-NOT: jt#2:
+; CHECKM1-NEXT: jt#0:
+; CHECKM1-SAME: jt#1
+; CHECKM1-NOT: jt#2:
+; CHEC-NEXT: Function Live Ins:
+
+bb1: tail call void @ext(i32 0) br label %return
+bb2: tail call void @ext(i32 2) br label %return
+bb3: tail call void @ext(i32 4) br label %return
+bb4: tail call void @ext(i32 6) br label %return
+bb5: tail call void @ext(i32 8) br label %return
+bb6: tail call void @ext(i32 10) br label %return
+bb7: tail call void @ext(i32 12) br label %return
+bb8: tail call void @ext(i32 14) br label %return
+bb9: tail call void @ext(i32 16) br label %return
+bb10: tail call void @ext(i32 18) br label %return
+bb11: tail call void @ext(i32 20) br label %return
+bb12: tail call void @ext(i32 22) br label %return
+bb13: tail call void @ext(i32 24) br label %return
+bb14: tail call void @ext(i32 26) br label %return
+bb15: tail call void @ext(i32 28) br label %return
+bb16: tail call void @ext(i32 30) br label %return
+bb17: tail call void @ext(i32 32) br label %return
+
+return: ret i32 %b
+}
+
+define void @jt2(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+
+    i32 14, label %bb5
+    i32 15, label %bb6
+  ]
+; CHECK-LABEL: function jt2:
+; CHECK-NEXT: Jump Tables:
+; CHECK0-NEXT: jt#0:  BB#1 BB#2 BB#3 BB#4 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#5 BB#6{{$}}
+; CHECK4-NEXT: jt#0:  BB#1 BB#2 BB#3 BB#4{{$}}
+; CHECK8-NEXT: jt#0:  BB#1 BB#2 BB#3 BB#4{{$}}
+; CHECKM1-NEXT: jt#0:  BB#1 BB#2 BB#3 BB#4{{$}}
+; CHEC-NEXT: Function Live Ins:
+
+bb1: tail call void @ext(i32 1) br label %return
+bb2: tail call void @ext(i32 2) br label %return
+bb3: tail call void @ext(i32 3) br label %return
+bb4: tail call void @ext(i32 4) br label %return
+bb5: tail call void @ext(i32 5) br label %return
+bb6: tail call void @ext(i32 6) br label %return
+return: ret void
+}
diff --git a/test/CodeGen/AArch64/memcpy-f128.ll b/test/CodeGen/AArch64/memcpy-f128.ll
index 76db2974ab4d..7e6ec36104ab 100644
--- a/test/CodeGen/AArch64/memcpy-f128.ll
+++ b/test/CodeGen/AArch64/memcpy-f128.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 %structA = type { i128 }
 @stubA = internal unnamed_addr constant %structA zeroinitializer, align 8
diff --git a/test/CodeGen/AArch64/merge-store-dependency.ll b/test/CodeGen/AArch64/merge-store-dependency.ll
index c68cee91a3cf..4f2af9ed7e65 100644
--- a/test/CodeGen/AArch64/merge-store-dependency.ll
+++ b/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu cortex-a53 -march aarch64 %s -o - | FileCheck %s --check-prefix=A53
+; RUN: llc < %s -mcpu cortex-a53 -mtriple=aarch64-eabi | FileCheck %s --check-prefix=A53
 
 ; PR26827 - Merge stores causes wrong dependency.
 %struct1 = type { %struct1*, %struct1*, i32, i32, i16, i16, void (i32, i32, i8*)*, i8* }
diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll
index 981d16f762ff..1d0196ad521d 100644
--- a/test/CodeGen/AArch64/merge-store.ll
+++ b/test/CodeGen/AArch64/merge-store.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-unknown-unknown %s -mcpu=cyclone -o - | FileCheck %s --check-prefix=CYCLONE --check-prefix=CHECK
-; RUN: llc -march aarch64 %s -mattr=-slow-misaligned-128store -o - | FileCheck %s --check-prefix=MISALIGNED --check-prefix=CHECK
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone | FileCheck %s --check-prefix=CYCLONE --check-prefix=CHECK
+; RUN: llc < %s -mtriple=aarch64-eabi -mattr=-slow-misaligned-128store | FileCheck %s --check-prefix=MISALIGNED --check-prefix=CHECK
 
 @g0 = external global <3 x float>, align 16
 @g1 = external global <3 x float>, align 4
diff --git a/test/CodeGen/AArch64/min-jump-table.ll b/test/CodeGen/AArch64/min-jump-table.ll
new file mode 100644
index 000000000000..80974debc48a
--- /dev/null
+++ b/test/CodeGen/AArch64/min-jump-table.ll
@@ -0,0 +1,79 @@
+; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=0 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0  < %t
+; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4  < %t
+; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8  < %t
+
+declare void @ext(i32)
+
+define i32 @jt2(i32 %a, i32 %b) {
+entry:
+  switch i32 %a, label %return [
+    i32 1, label %bb1
+    i32 2, label %bb2
+  ]
+; CHECK-LABEL: function jt2:
+; CHECK0-NEXT: Jump Tables:
+; CHECK0-NEXT: jt#0:
+; CHECK0-NOT: jt#1:
+; CHECK4-NOT: Jump Tables:
+; CHECK8-NOT: Jump Tables:
+
+bb1: tail call void @ext(i32 0) br label %return
+bb2: tail call void @ext(i32 2) br label %return
+
+return: ret i32 %b
+}
+
+define i32 @jt4(i32 %a, i32 %b) {
+entry:
+  switch i32 %a, label %return [
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+  ]
+; CHECK-LABEL: function jt4:
+; CHECK0-NEXT: Jump Tables:
+; CHECK0-NEXT: jt#0:
+; CHECK0-NOT: jt#1:
+; CHECK4-NEXT: Jump Tables:
+; CHECK4-NEXT: jt#0:
+; CHECK4-NOT: jt#1:
+; CHECK8-NOT: Jump Tables:
+
+bb1: tail call void @ext(i32 0) br label %return
+bb2: tail call void @ext(i32 2) br label %return
+bb3: tail call void @ext(i32 4) br label %return
+bb4: tail call void @ext(i32 6) br label %return
+
+return: ret i32 %b
+}
+
+define i32 @jt8(i32 %a, i32 %b) {
+entry:
+  switch i32 %a, label %return [
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+    i32 5, label %bb5
+    i32 6, label %bb6
+    i32 7, label %bb7
+    i32 8, label %bb8
+  ]
+; CHECK-LABEL: function jt8:
+; CHECK-NEXT: Jump Tables:
+; CHECK-NEXT: jt#0:
+; CHECK-NOT: jt#1:
+
+bb1: tail call void @ext(i32 0) br label %return
+bb2: tail call void @ext(i32 2) br label %return
+bb3: tail call void @ext(i32 4) br label %return
+bb4: tail call void @ext(i32 6) br label %return
+bb5: tail call void @ext(i32 8) br label %return
+bb6: tail call void @ext(i32 10) br label %return
+bb7: tail call void @ext(i32 12) br label %return
+bb8: tail call void @ext(i32 14) br label %return
+
+return: ret i32 %b
+}
+
diff --git a/test/CodeGen/AArch64/misched-fusion.ll b/test/CodeGen/AArch64/misched-fusion.ll
index 0f4c0ac84ce5..d5dd9c757dfd 100644
--- a/test/CodeGen/AArch64/misched-fusion.ll
+++ b/test/CodeGen/AArch64/misched-fusion.ll
@@ -1,4 +1,4 @@
-; RUN: llc -o - %s -mattr=+macroop-fusion,+use-postra-scheduler | FileCheck %s
+; RUN: llc -o - %s -mattr=+arith-cbz-fusion | FileCheck %s
 ; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s
 
 target triple = "arm64-apple-ios"
diff --git a/test/CodeGen/AArch64/movimm-wzr.mir b/test/CodeGen/AArch64/movimm-wzr.mir
index d54e7bef54cd..093f85bd9319 100644
--- a/test/CodeGen/AArch64/movimm-wzr.mir
+++ b/test/CodeGen/AArch64/movimm-wzr.mir
@@ -15,11 +15,7 @@
 name:            test_mov_0
 alignment:       2
 exposesReturnsTwice: false
-hasInlineAsm:    false
-allVRegsAllocated: true
-isSSA:           false
 tracksRegLiveness: false
-tracksSubRegLiveness: false
 frameInfo:
   isFrameAddressTaken: false
   isReturnAddressTaken: false
@@ -43,4 +39,4 @@ body:             |
 ...
 
 # CHECK: bb.0
-# CHECK-NEXT: RET %lr
+# CHECK-NEXT: RET undef %lr
diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll
index e93521858a31..4ba4cfab8aeb 100644
--- a/test/CodeGen/AArch64/mul-lohi.ll
+++ b/test/CodeGen/AArch64/mul-lohi.ll
@@ -3,16 +3,18 @@
 
 define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
 ; CHECK-LABEL: test_128bitmul:
-; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3
-; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2
-; CHECK: mul [[PART2:x[0-9]+]], x1, x2
-; CHECK: mul x0, x0, x2
+; CHECK:       umulh [[HI:x[0-9]+]], x0, x2
+; CHECK:       madd  [[TEMP1:x[0-9]+]], x0, x3, [[HI]]
+; CHECK-DAG:   madd  x1, x1, x2, [[TEMP1]]
+; CHECK-DAG:   mul   x0, x0, x2
+; CHECK-NEXT:  ret
 
 ; CHECK-BE-LABEL: test_128bitmul:
-; CHECK-BE-DAG: mul [[PART1:x[0-9]+]], x1, x2
-; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3
-; CHECK-BE: mul [[PART2:x[0-9]+]], x0, x3
-; CHECK-BE: mul x1, x1, x3
+; CHECK-BE:       umulh [[HI:x[0-9]+]], x1, x3
+; CHECK-BE:       madd  [[TEMP1:x[0-9]+]], x1, x2, [[HI]]
+; CHECK-BE-DAG:   madd  x0, x0, x3, [[TEMP1]]
+; CHECK-BE-DAG:   mul   x1, x1, x3
+; CHECK-BE-NEXT:  ret
 
   %prod = mul i128 %lhs, %rhs
   ret i128 %prod
@@ -25,8 +27,8 @@ define i128 @test_128bitmul_optsize(i128 %lhs, i128 %rhs) optsize {
 ; CHECK-LABEL: test_128bitmul_optsize:
 ; CHECK:       umulh [[HI:x[0-9]+]], x0, x2
 ; CHECK-NEXT:  madd  [[TEMP1:x[0-9]+]], x0, x3, [[HI]]
-; CHECK-NEXT:  madd  x1, x1, x2, [[TEMP1]]
-; CHECK-NEXT:  mul   x0, x0, x2
+; CHECK-DAG:   madd  x1, x1, x2, [[TEMP1]]
+; CHECK-DAG:   mul   x0, x0, x2
 ; CHECK-NEXT:  ret
 
   %prod = mul i128 %lhs, %rhs
@@ -37,8 +39,8 @@ define i128 @test_128bitmul_minsize(i128 %lhs, i128 %rhs) minsize {
 ; CHECK-LABEL: test_128bitmul_minsize:
 ; CHECK:       umulh [[HI:x[0-9]+]], x0, x2
 ; CHECK-NEXT:  madd  [[TEMP1:x[0-9]+]], x0, x3, [[HI]]
-; CHECK-NEXT:  madd  x1, x1, x2, [[TEMP1]]
-; CHECK-NEXT:  mul   x0, x0, x2
+; CHECK-DAG:   madd  x1, x1, x2, [[TEMP1]]
+; CHECK-DAG:   mul   x0, x0, x2
 ; CHECK-NEXT:  ret
 
   %prod = mul i128 %lhs, %rhs
diff --git a/test/CodeGen/AArch64/mul_pow2.ll b/test/CodeGen/AArch64/mul_pow2.ll
index b828223ef1c9..80a7b7200806 100644
--- a/test/CodeGen/AArch64/mul_pow2.ll
+++ b/test/CodeGen/AArch64/mul_pow2.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=aarch64 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi | FileCheck %s
 
 ; Convert mul x, pow2 to shift.
 ; Convert mul x, pow2 +/- 1 to shift + add/sub.
+; Convert mul x, (pow2 + 1) * pow2 to shift + add + shift.
+; Lowering other positive constants are not supported yet.
 
 define i32 @test2(i32 %x) {
 ; CHECK-LABEL: test2
@@ -36,6 +38,122 @@ define i32 @test5(i32 %x) {
   ret i32 %mul
 }
 
+define i32 @test6_32b(i32 %x) {
+; CHECK-LABEL: test6
+; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1
+; CHECK: lsl w0, {{w[0-9]+}}, #1
+
+  %mul = mul nsw i32 %x, 6 
+  ret i32 %mul
+}
+
+define i64 @test6_64b(i64 %x) {
+; CHECK-LABEL: test6_64b
+; CHECK: add {{x[0-9]+}}, x0, x0, lsl #1
+; CHECK: lsl x0, {{x[0-9]+}}, #1
+
+  %mul = mul nsw i64 %x, 6 
+  ret i64 %mul
+}
+
+; mul that appears together with add, sub, s(z)ext is not supported to be 
+; converted to the combination of lsl, add/sub yet.
+define i64 @test6_umull(i32 %x) {
+; CHECK-LABEL: test6_umull
+; CHECK: umull x0, w0, {{w[0-9]+}} 
+
+  %ext = zext i32 %x to i64
+  %mul = mul nsw i64 %ext, 6 
+  ret i64 %mul
+}
+
+define i64 @test6_smull(i32 %x) {
+; CHECK-LABEL: test6_smull
+; CHECK: smull x0, w0, {{w[0-9]+}} 
+
+  %ext = sext i32 %x to i64
+  %mul = mul nsw i64 %ext, 6 
+  ret i64 %mul
+}
+
+define i32 @test6_madd(i32 %x, i32 %y) {
+; CHECK-LABEL: test6_madd
+; CHECK: madd w0, w0, {{w[0-9]+}}, w1 
+
+  %mul = mul nsw i32 %x, 6 
+  %add = add i32 %mul, %y
+  ret i32 %add
+}
+
+define i32 @test6_msub(i32 %x, i32 %y) {
+; CHECK-LABEL: test6_msub
+; CHECK: msub w0, w0, {{w[0-9]+}}, w1 
+
+  %mul = mul nsw i32 %x, 6 
+  %sub = sub i32 %y, %mul
+  ret i32 %sub
+}
+
+define i64 @test6_umaddl(i32 %x, i64 %y) {
+; CHECK-LABEL: test6_umaddl
+; CHECK: umaddl x0, w0, {{w[0-9]+}}, x1 
+
+  %ext = zext i32 %x to i64
+  %mul = mul nsw i64 %ext, 6 
+  %add = add i64 %mul, %y
+  ret i64 %add
+}
+
+define i64 @test6_smaddl(i32 %x, i64 %y) {
+; CHECK-LABEL: test6_smaddl
+; CHECK: smaddl x0, w0, {{w[0-9]+}}, x1
+
+  %ext = sext i32 %x to i64
+  %mul = mul nsw i64 %ext, 6 
+  %add = add i64 %mul, %y
+  ret i64 %add
+}
+
+define i64 @test6_umsubl(i32 %x, i64 %y) {
+; CHECK-LABEL: test6_umsubl
+; CHECK: umsubl x0, w0, {{w[0-9]+}}, x1
+
+  %ext = zext i32 %x to i64
+  %mul = mul nsw i64 %ext, 6 
+  %sub = sub i64 %y, %mul
+  ret i64 %sub
+}
+
+define i64 @test6_smsubl(i32 %x, i64 %y) {
+; CHECK-LABEL: test6_smsubl
+; CHECK: smsubl x0, w0, {{w[0-9]+}}, x1 
+
+  %ext = sext i32 %x to i64
+  %mul = mul nsw i64 %ext, 6 
+  %sub = sub i64 %y, %mul
+  ret i64 %sub
+}
+
+define i64 @test6_umnegl(i32 %x) {
+; CHECK-LABEL: test6_umnegl
+; CHECK: umnegl x0, w0, {{w[0-9]+}} 
+
+  %ext = zext i32 %x to i64
+  %mul = mul nsw i64 %ext, 6 
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
+define i64 @test6_smnegl(i32 %x) {
+; CHECK-LABEL: test6_smnegl
+; CHECK: smnegl x0, w0, {{w[0-9]+}} 
+
+  %ext = sext i32 %x to i64
+  %mul = mul nsw i64 %ext, 6 
+  %sub = sub i64 0, %mul
+  ret i64 %sub
+}
+
 define i32 @test7(i32 %x) {
 ; CHECK-LABEL: test7
 ; CHECK: lsl {{w[0-9]+}}, w0, #3
@@ -57,12 +175,72 @@ define i32 @test9(i32 %x) {
 ; CHECK-LABEL: test9
 ; CHECK: add w0, w0, w0, lsl #3
 
-  %mul = mul nsw i32 %x, 9
+  %mul = mul nsw i32 %x, 9 
+  ret i32 %mul
+}
+
+define i32 @test10(i32 %x) {
+; CHECK-LABEL: test10
+; CHECK: add {{w[0-9]+}}, w0, w0, lsl #2
+; CHECK: lsl w0, {{w[0-9]+}}, #1
+
+  %mul = mul nsw i32 %x, 10
+  ret i32 %mul
+}
+
+define i32 @test11(i32 %x) {
+; CHECK-LABEL: test11
+; CHECK: mul w0, w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, 11
+  ret i32 %mul
+}
+
+define i32 @test12(i32 %x) {
+; CHECK-LABEL: test12
+; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1
+; CHECK: lsl w0, {{w[0-9]+}}, #2
+
+  %mul = mul nsw i32 %x, 12
+  ret i32 %mul
+}
+
+define i32 @test13(i32 %x) {
+; CHECK-LABEL: test13
+; CHECK: mul w0, w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, 13
+  ret i32 %mul
+}
+
+define i32 @test14(i32 %x) {
+; CHECK-LABEL: test14
+; CHECK: mul w0, w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, 14 
+  ret i32 %mul
+}
+
+define i32 @test15(i32 %x) {
+; CHECK-LABEL: test15
+; CHECK: lsl {{w[0-9]+}}, w0, #4
+; CHECK: sub w0, {{w[0-9]+}}, w0
+
+  %mul = mul nsw i32 %x, 15
+  ret i32 %mul
+}
+
+define i32 @test16(i32 %x) {
+; CHECK-LABEL: test16
+; CHECK: lsl w0, w0, #4
+
+  %mul = mul nsw i32 %x, 16
   ret i32 %mul
 }
 
 ; Convert mul x, -pow2 to shift.
 ; Convert mul x, -(pow2 +/- 1) to shift + add/sub.
+; Lowering other negative constants are not supported yet.
 
 define i32 @ntest2(i32 %x) {
 ; CHECK-LABEL: ntest2
@@ -96,6 +274,14 @@ define i32 @ntest5(i32 %x) {
   ret i32 %mul
 }
 
+define i32 @ntest6(i32 %x) {
+; CHECK-LABEL: ntest6
+; CHECK: mul w0, w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, -6
+  ret i32 %mul
+}
+
 define i32 @ntest7(i32 %x) {
 ; CHECK-LABEL: ntest7
 ; CHECK: sub w0, w0, w0, lsl #3
@@ -120,3 +306,58 @@ define i32 @ntest9(i32 %x) {
   %mul = mul nsw i32 %x, -9
   ret i32 %mul
 }
+
+define i32 @ntest10(i32 %x) {
+; CHECK-LABEL: ntest10
+; CHECK: mul w0, w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, -10
+  ret i32 %mul
+}
+
+define i32 @ntest11(i32 %x) {
+; CHECK-LABEL: ntest11
+; CHECK: mul w0, w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, -11
+  ret i32 %mul
+}
+
+define i32 @ntest12(i32 %x) {
+; CHECK-LABEL: ntest12
+; CHECK: mul w0, w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, -12
+  ret i32 %mul
+}
+
+define i32 @ntest13(i32 %x) {
+; CHECK-LABEL: ntest13
+; CHECK: mul w0, w0, {{w[0-9]+}}
+  %mul = mul nsw i32 %x, -13
+  ret i32 %mul
+}
+
+define i32 @ntest14(i32 %x) {
+; CHECK-LABEL: ntest14
+; CHECK: mul w0, w0, {{w[0-9]+}}
+
+  %mul = mul nsw i32 %x, -14
+  ret i32 %mul
+}
+
+define i32 @ntest15(i32 %x) {
+; CHECK-LABEL: ntest15
+; CHECK: sub w0, w0, w0, lsl #4
+
+  %mul = mul nsw i32 %x, -15
+  ret i32 %mul
+}
+
+define i32 @ntest16(i32 %x) {
+; CHECK-LABEL: ntest16
+; CHECK: neg w0, w0, lsl #4
+
+  %mul = mul nsw i32 %x, -16
+  ret i32 %mul
+}
diff --git a/test/CodeGen/AArch64/neg-imm.ll b/test/CodeGen/AArch64/neg-imm.ll
index 375d3dbfd0d5..46bded78cc59 100644
--- a/test/CodeGen/AArch64/neg-imm.ll
+++ b/test/CodeGen/AArch64/neg-imm.ll
@@ -30,9 +30,9 @@ if.then3:
 
 for.inc:
 ; CHECK_LABEL: %for.inc
-; CHECK:  add
-; CHECK-NEXT:  cmp
-; CHECK:  b.le
+; CHECK:  cmp
+; CHECK-NEXT:  add
+; CHECK-NEXT:  b.le
 ; CHECK_LABEL: %for.cond.cleanup
   %inc = add nsw i32 %x.015, 1
   %cmp1 = icmp sgt i32 %x.015, %px
diff --git a/test/CodeGen/AArch64/neon-inline-asm-16-bit-fp.ll b/test/CodeGen/AArch64/neon-inline-asm-16-bit-fp.ll
new file mode 100644
index 000000000000..3656a7879770
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-inline-asm-16-bit-fp.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+; generated from
+; __fp16 test(__fp16 a1, __fp16 a2) {
+;    __fp16 res0;
+;    __asm__("sqrshl %h[__res], %h[__A], %h[__B]"
+;             : [__res] "=w" (res0)
+;             : [__A] "w" (a1), [__B] "w" (a2)
+;             :
+;             );
+;    return res0;
+;}
+
+; Function Attrs: nounwind readnone
+define half @test(half %a1, half %a2) #0 {
+entry:
+  ;CHECK: sqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+  %0 = tail call half asm "sqrshl ${0:h}, ${1:h}, ${2:h}", "=w,w,w" (half %a1, half %a2) #1
+  ret half %0
+}
diff --git a/test/CodeGen/AArch64/no-quad-ldp-stp.ll b/test/CodeGen/AArch64/no-quad-ldp-stp.ll
index 19d371adbdf0..6324835b322b 100644
--- a/test/CodeGen/AArch64/no-quad-ldp-stp.ll
+++ b/test/CodeGen/AArch64/no-quad-ldp-stp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=aarch64 -mattr=+no-quad-ldst-pairs -verify-machineinstrs -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=aarch64 -mcpu=exynos-m1 -verify-machineinstrs -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+no-quad-ldst-pairs -verify-machineinstrs -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -mcpu=exynos-m1 -verify-machineinstrs -asm-verbose=false | FileCheck %s
 
 ; CHECK-LABEL: test_nopair_st
 ; CHECK: str
diff --git a/test/CodeGen/AArch64/nzcv-save.ll b/test/CodeGen/AArch64/nzcv-save.ll
index 9329f3962934..2700b1db9dd5 100644
--- a/test/CodeGen/AArch64/nzcv-save.ll
+++ b/test/CodeGen/AArch64/nzcv-save.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -march=aarch64 < %s | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-eabi | FileCheck %s
 
 ; CHECK: mrs [[NZCV_SAVE:x[0-9]+]], NZCV
 ; CHECK: msr NZCV, [[NZCV_SAVE]]
diff --git a/test/CodeGen/AArch64/phi-dbg.ll b/test/CodeGen/AArch64/phi-dbg.ll
new file mode 100644
index 000000000000..a1adf0f50d9b
--- /dev/null
+++ b/test/CodeGen/AArch64/phi-dbg.ll
@@ -0,0 +1,75 @@
+; RUN: llc -O0 %s -mtriple=aarch64 -o - | FileCheck %s
+
+; Test that a DEBUG_VALUE node is create for variable c after the phi has been
+; converted to a ldr.    The DEBUG_VALUE must be *after* the ldr and not before it.
+
+; Created from the C code, compiled with -O0 -g and then passed through opt -mem2reg:
+;
+; int func(int a)
+; {
+;         int c = 1;
+;         if (a < 0 ) {
+;                 c = 12;
+;         }
+;         return c;
+; }
+;
+; Function Attrs: nounwind
+define i32 @func(i32) #0 !dbg !8 {
+  call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !12, metadata !13), !dbg !14
+  call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !15, metadata !13), !dbg !16
+  %2 = icmp slt i32 %0, 0, !dbg !17
+  br i1 %2, label %3, label %4, !dbg !19
+
+; <label>:3:                                      ; preds = %1
+  call void @llvm.dbg.value(metadata i32 12, i64 0, metadata !15, metadata !13), !dbg !16
+  br label %4, !dbg !20
+
+; <label>:4:                                      ; preds = %3, %1
+  %.0 = phi i32 [ 12, %3 ], [ 1, %1 ]
+; CHECK: ldr     w[[REG:[0-9]+]], [sp, #8]
+; CHECK-NEXT: .Ltmp
+  call void @llvm.dbg.value(metadata i32 %.0, i64 0, metadata !15, metadata !13), !dbg !16
+; CHECK-NEXT:  //DEBUG_VALUE: func:c <- %W[[REG]]
+  %5 = add nsw i32 %.0, %0, !dbg !22
+  call void @llvm.dbg.value(metadata i32 %5, i64 0, metadata !15, metadata !13), !dbg !16
+  ret i32 %5, !dbg !23
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "a.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 1, !"min_enum_size", i32 4}
+!7 = !{!"clang"}
+!8 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !11}
+!11 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!12 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 1, type: !11)
+!13 = !DIExpression()
+!14 = !DILocation(line: 1, column: 14, scope: !8)
+!15 = !DILocalVariable(name: "c", scope: !8, file: !1, line: 3, type: !11)
+!16 = !DILocation(line: 3, column: 13, scope: !8)
+!17 = !DILocation(line: 4, column: 15, scope: !18)
+!18 = distinct !DILexicalBlock(scope: !8, file: !1, line: 4, column: 13)
+!19 = !DILocation(line: 4, column: 13, scope: !8)
+!20 = !DILocation(line: 6, column: 9, scope: !21)
+!21 = distinct !DILexicalBlock(scope: !18, file: !1, line: 4, column: 21)
+!22 = !DILocation(line: 7, column: 4, scope: !8)
+!23 = !DILocation(line: 8, column: 9, scope: !8)
diff --git a/test/CodeGen/AArch64/postra-mi-sched.ll b/test/CodeGen/AArch64/postra-mi-sched.ll
index 5a407246609e..e7f3f5515a7a 100644
--- a/test/CodeGen/AArch64/postra-mi-sched.ll
+++ b/test/CodeGen/AArch64/postra-mi-sched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O3 -march=aarch64 -mcpu=cortex-a53 | FileCheck %s
+; RUN: llc < %s -O3 -mtriple=aarch64-eabi -mcpu=cortex-a53 | FileCheck %s
 
 ; With cortex-a53, each of fmul and fcvt have latency of 6 cycles.  After the
 ; pre-RA MI scheduler, fmul, fcvt and fdiv will be consecutive.  The top-down
diff --git a/test/CodeGen/AArch64/recp-fastmath.ll b/test/CodeGen/AArch64/recp-fastmath.ll
index 710739b2cc5f..38e0fb360e49 100644
--- a/test/CodeGen/AArch64/recp-fastmath.ll
+++ b/test/CodeGen/AArch64/recp-fastmath.ll
@@ -1,79 +1,150 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!div,!vec-div | FileCheck %s --check-prefix=FAULT
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=div,vec-div   | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon | FileCheck %s
 
-define float @frecp(float %x) #0 {
+define float @frecp0(float %x) #0 {
   %div = fdiv fast float 1.0, %x
   ret float %div
 
-; FAULT-LABEL: frecp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
+; CHECK-LABEL: frecp0:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: fdiv
+}
+
+define float @frecp1(float %x) #1 {
+  %div = fdiv fast float 1.0, %x
+  ret float %div
+
+; CHECK-LABEL: frecp1:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frecpe [[R:s[0-7]]]
+; CHECK-NEXT: frecps {{s[0-7](, s[0-7])?}}, [[R]]
+}
+
+define <2 x float> @f2recp0(<2 x float> %x) #0 {
+  %div = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
+  ret <2 x float> %div
 
-; CHECK-LABEL: frecp:
+; CHECK-LABEL: f2recp0:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: frecpe
 ; CHECK-NEXT: fmov
+; CHECK-NEXT: fdiv
 }
 
-define <2 x float> @f2recp(<2 x float> %x) #0 {
+define <2 x float> @f2recp1(<2 x float> %x) #1 {
   %div = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
   ret <2 x float> %div
 
-; FAULT-LABEL: f2recp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
+; CHECK-LABEL: f2recp1:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frecpe [[R:v[0-7]\.2s]]
+; CHECK-NEXT: frecps {{v[0-7]\.2s(, v[0-7].2s)?}}, [[R]]
+}
+
+define <4 x float> @f4recp0(<4 x float> %x) #0 {
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <4 x float> %div
 
-; CHECK-LABEL: f2recp:
+; CHECK-LABEL: f4recp0:
 ; CHECK-NEXT: BB#0
 ; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
+; CHECK-NEXT: fdiv
 }
 
-define <4 x float> @f4recp(<4 x float> %x) #0 {
+define <4 x float> @f4recp1(<4 x float> %x) #1 {
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <4 x float> %div
 
-; FAULT-LABEL: f4recp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
+; CHECK-LABEL: f4recp1:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frecpe [[R:v[0-7]\.4s]]
+; CHECK-NEXT: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[R]]
+}
+
+define <8 x float> @f8recp0(<8 x float> %x) #0 {
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <8 x float> %div
 
-; CHECK-LABEL: f4recp:
+; CHECK-LABEL: f8recp0:
 ; CHECK-NEXT: BB#0
 ; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
+; CHECK-NEXT: fdiv
+; CHECK-NEXT: fdiv
 }
 
-define double @drecp(double %x) #0 {
+define <8 x float> @f8recp1(<8 x float> %x) #1 {
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <8 x float> %div
+
+; CHECK-LABEL: f8recp1:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frecpe [[RA:v[0-7]\.4s]]
+; CHECK-NEXT: frecpe [[RB:v[0-7]\.4s]]
+; CHECK-NEXT: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[RA]]
+; CHECK: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[RB]]
+}
+
+define double @drecp0(double %x) #0 {
   %div = fdiv fast double 1.0, %x
   ret double %div
 
-; FAULT-LABEL: drecp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
+; CHECK-LABEL: drecp0:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: fdiv
+}
 
-; CHECK-LABEL: drecp:
+define double @drecp1(double %x) #1 {
+  %div = fdiv fast double 1.0, %x
+  ret double %div
+
+; CHECK-LABEL: drecp1:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frecpe [[R:d[0-7]]]
+; CHECK-NEXT: frecps {{d[0-7](, d[0-7])?}}, [[R]]
+}
+
+define <2 x double> @d2recp0(<2 x double> %x) #0 {
+  %div = fdiv fast <2 x double> <double 1.0, double 1.0>, %x
+  ret <2 x double> %div
+
+; CHECK-LABEL: d2recp0:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: frecpe
 ; CHECK-NEXT: fmov
+; CHECK-NEXT: fdiv
 }
 
-define <2 x double> @d2recp(<2 x double> %x) #0 {
+define <2 x double> @d2recp1(<2 x double> %x) #1 {
   %div = fdiv fast <2 x double> <double 1.0, double 1.0>, %x
   ret <2 x double> %div
 
-; FAULT-LABEL: d2recp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
+; CHECK-LABEL: d2recp1:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frecpe [[R:v[0-7]\.2d]]
+; CHECK-NEXT: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[R]]
+}
+
+define <4 x double> @d4recp0(<4 x double> %x) #0 {
+  %div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %x
+  ret <4 x double> %div
 
-; CHECK-LABEL: d2recp:
+; CHECK-LABEL: d4recp0:
 ; CHECK-NEXT: BB#0
 ; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
+; CHECK-NEXT: fdiv
+; CHECK-NEXT: fdiv
+}
+
+define <4 x double> @d4recp1(<4 x double> %x) #1 {
+  %div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %x
+  ret <4 x double> %div
+
+; CHECK-LABEL: d4recp1:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frecpe [[RA:v[0-7]\.2d]]
+; CHECK-NEXT: frecpe [[RB:v[0-7]\.2d]]
+; CHECK-NEXT: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[RA]]
+; CHECK: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[RB]]
 }
 
 attributes #0 = { nounwind "unsafe-fp-math"="true" }
+attributes #1 = { nounwind "unsafe-fp-math"="true" "reciprocal-estimates"="div,vec-div" }
diff --git a/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll b/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll
new file mode 100644
index 000000000000..27a33a2337e3
--- /dev/null
+++ b/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s | FileCheck %s
+; Make sure we don't crash in AArch64RedundantCopyElimination when a
+; MachineBasicBlock is empty.  PR29035.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare i8* @bar()
+
+; CHECK-LABEL: foo:
+; CHECK: tbz
+; CHECK: orr
+; CHECK: ret
+; CHECK: bl bar
+; CHECK: cbnz
+; CHECK: ret
+define i1 @foo(i1 %start) {
+entry:
+  br i1 %start, label %cleanup, label %if.end
+
+if.end:                                           ; preds = %if.end, %entry
+  %call = tail call i8* @bar()
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cleanup, label %if.end
+
+cleanup:                                          ; preds = %if.end, %entry
+  %retval.0 = phi i1 [ true, %entry ], [ false, %if.end ]
+  ret i1 %retval.0
+}
diff --git a/test/CodeGen/AArch64/regcoal-physreg.mir b/test/CodeGen/AArch64/regcoal-physreg.mir
new file mode 100644
index 000000000000..e1c4d703003e
--- /dev/null
+++ b/test/CodeGen/AArch64/regcoal-physreg.mir
@@ -0,0 +1,67 @@
+# RUN: llc -mtriple=aarch64-apple-ios -run-pass=simple-register-coalescing %s -o - | FileCheck %s
+--- |
+  define void @func() { ret void }
+...
+---
+# Check coalescing of COPYs from reserved physregs.
+# CHECK-LABEL: name: func
+name: func
+registers:
+  - { id: 0, class: gpr32 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: gpr64 }
+  - { id: 3, class: gpr32 }
+  - { id: 4, class: gpr64 }
+  - { id: 5, class: gpr32 }
+  - { id: 6, class: xseqpairsclass }
+body: |
+  bb.0:
+    ; We usually should not coalesce copies from allocatable physregs.
+    ; CHECK: %0 = COPY %w7
+    ; CHECK: STRWui %0, %x1, 0
+    %0 = COPY %w7
+    STRWui %0, %x1, 0
+
+    ; It is fine to coalesce copies from reserved physregs
+    ; CHECK-NOT: COPY
+    ; CHECK: STRXui %fp, %x1, 0
+    %1 = COPY %fp
+    STRXui %1, %x1, 0
+
+    ; It is not fine to coalesce copies from reserved physregs when they are
+    ; clobbered.
+    ; CHECK: %2 = COPY %fp
+    ; CHECK: STRXui %2, %x1, 0
+    %2 = COPY %fp
+    %fp = SUBXri %fp, 4, 0
+    STRXui %2, %x1, 0
+
+    ; Is is fine to coalesce copies from constant physregs even when they are
+    ; clobbered.
+    ; CHECK-NOT: COPY
+    ; CHECK: STRWui %wzr, %x1
+    %3 = COPY %wzr
+    dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
+    STRWui %3, %x1, 0
+
+    ; Is is fine to coalesce copies from constant physregs even when they are
+    ; clobbered.
+    ; CHECK-NOT: COPY
+    ; CHECK: STRXui %xzr, %x1
+    %4 = COPY %xzr
+    dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv
+    STRXui %4, %x1, 0
+
+    ; Coalescing COPYs into constant physregs.
+    ; CHECK: %wzr = SUBSWri %w1, 0, 0
+    %5 = SUBSWri %w1, 0, 0, implicit-def %nzcv
+    %wzr = COPY %5
+
+    ; Only coalesce when the source register is reserved as a whole (this is
+    ; a limitation of the current code which cannot update liveness information
+    ; of the non-reserved part).
+    ; CHECK: %6 = COPY %xzr_x0
+    ; CHECK: HINT 0, implicit %6
+    %6 = COPY %xzr_x0
+    HINT 0, implicit %6
+...
diff --git a/test/CodeGen/AArch64/rem_crash.ll b/test/CodeGen/AArch64/rem_crash.ll
index 71f1a80e24e2..f9cf6d58370c 100644
--- a/test/CodeGen/AArch64/rem_crash.ll
+++ b/test/CodeGen/AArch64/rem_crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=aarch64
+; RUN: llc < %s -mtriple=aarch64-eabi
 
 define i8 @test_minsize_uu8(i8 %x) minsize optsize {
 entry:
diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll
index b2ca1cca0812..5081a9da3404 100644
--- a/test/CodeGen/AArch64/remat.ll
+++ b/test/CodeGen/AArch64/remat.ll
@@ -4,6 +4,9 @@
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a72 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a73 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m1 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m2 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m3 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=falkor -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=vulcan -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s
diff --git a/test/CodeGen/AArch64/rm_redundant_cmp.ll b/test/CodeGen/AArch64/rm_redundant_cmp.ll
index f66af7fd6270..22d0584f63b0 100644
--- a/test/CodeGen/AArch64/rm_redundant_cmp.ll
+++ b/test/CodeGen/AArch64/rm_redundant_cmp.ll
@@ -11,9 +11,9 @@
 define void @test_i16_2cmp_signed_1() {
 ; CHECK-LABEL: test_i16_2cmp_signed_1
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.gt
+; CHECK-NEXT: b.lt
 ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
 entry:
   %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
   %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
@@ -39,7 +39,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i16_2cmp_signed_2() {
 ; CHECK-LABEL: test_i16_2cmp_signed_2
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.le
+; CHECK-NEXT: b.gt
 ; CHECK-NOT: cmp
 ; CHECK: b.ge
 entry:
@@ -67,9 +67,9 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i16_2cmp_unsigned_1() {
 ; CHECK-LABEL: test_i16_2cmp_unsigned_1
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.hi
+; CHECK-NEXT: b.lo
 ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
 entry:
   %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
   %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
@@ -95,7 +95,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i16_2cmp_unsigned_2() {
 ; CHECK-LABEL: test_i16_2cmp_unsigned_2
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.ls
+; CHECK-NEXT: b.hi
 ; CHECK-NOT: cmp
 ; CHECK: b.hs
 entry:
@@ -132,9 +132,9 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i8_2cmp_signed_1() {
 ; CHECK-LABEL: test_i8_2cmp_signed_1
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.gt
+; CHECK-NEXT: b.lt
 ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
 entry:
   %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
   %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
@@ -160,7 +160,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i8_2cmp_signed_2() {
 ; CHECK-LABEL: test_i8_2cmp_signed_2
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.le
+; CHECK-NEXT: b.gt
 ; CHECK-NOT: cmp
 ; CHECK: b.ge
 entry:
@@ -188,9 +188,9 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i8_2cmp_unsigned_1() {
 ; CHECK-LABEL: test_i8_2cmp_unsigned_1
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.hi
+; CHECK-NEXT: b.lo
 ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
 entry:
   %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
   %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
@@ -216,7 +216,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
 define void @test_i8_2cmp_unsigned_2() {
 ; CHECK-LABEL: test_i8_2cmp_unsigned_2
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.ls
+; CHECK-NEXT: b.hi
 ; CHECK-NOT: cmp
 ; CHECK: b.hs
 entry:
diff --git a/test/CodeGen/AArch64/sched-past-vector-ldst.ll b/test/CodeGen/AArch64/sched-past-vector-ldst.ll
new file mode 100644
index 000000000000..52333463c243
--- /dev/null
+++ b/test/CodeGen/AArch64/sched-past-vector-ldst.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mcpu=cortex-a53 -enable-post-misched=false -enable-aa-sched-mi | FileCheck %s
+
+; Check that the vector store intrinsic does not prevent fmla instructions from
+; being scheduled together.  Since the vector loads and stores generated from
+; the intrinsics do not alias each other, the store can be pushed past the load.
+; This allows fmla instructions to be scheduled together.
+
+
+; CHECK: fmla
+; CHECK-NEXT: fmla
+; CHECK-NEXT: fmla
+; CHECK-NEXT: fmla
+target datalayout = "e-m:e-i64:64-i128:128-n8:16:32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%Struct = type { i64*, [9 x double], [16 x {float, float}], [16 x {float, float}], i32, i32 }
+
+; Function Attrs: nounwind
+define linkonce_odr void @func(%Struct* nocapture %this) unnamed_addr #0 align 2 {
+entry:
+  %0 = insertelement <4 x float> undef, float undef, i32 0
+  %1 = insertelement <4 x float> %0, float undef, i32 1
+  %2 = insertelement <4 x float> %1, float undef, i32 2
+  %3 = insertelement <4 x float> %2, float undef, i32 3
+  %scevgep = getelementptr %Struct, %Struct* %this, i64 0, i32 2, i64 8, i32 0
+  %struct_ptr = bitcast float* %scevgep to i8*
+  %vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr)
+  %ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1
+  %fm1 = fmul <4 x float> %0, %ev1
+  %av1 = fadd <4 x float> %1, %fm1
+  %ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0
+  %fm2 = fmul <4 x float> %2, %ev2
+  %av2 = fadd <4 x float> %3, %fm2
+  %scevgep2 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 8, i32 0
+  %struct_ptr2 = bitcast float* %scevgep2 to i8*
+  tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av2, <4 x float> %av1, i8* %struct_ptr2)
+  %scevgep3 = getelementptr %Struct, %Struct* %this, i64 0, i32 2, i64 12, i32 0
+  %struct_ptr3 = bitcast float* %scevgep3 to i8*
+  %vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr3)
+  %ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1
+  %fm3 = fmul <4 x float> %0, %ev3
+  %av3 = fadd <4 x float> %1, %fm3
+  %ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0
+  %fm4 = fmul <4 x float> %2, %ev4
+  %av4 = fadd <4 x float> %3, %fm4
+  %scevgep4 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 12, i32 0
+  %struct_ptr4 = bitcast float* %scevgep4 to i8*
+  tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av4, <4 x float> %av3, i8* %struct_ptr4)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float>, <4 x float>, i8* nocapture) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
diff --git a/test/CodeGen/AArch64/scheduledag-constreg.mir b/test/CodeGen/AArch64/scheduledag-constreg.mir
new file mode 100644
index 000000000000..23c785504f01
--- /dev/null
+++ b/test/CodeGen/AArch64/scheduledag-constreg.mir
@@ -0,0 +1,29 @@
+# RUN: llc -o /dev/null %s -mtriple=aarch64-- -run-pass=machine-scheduler -enable-misched -debug-only=misched 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @func() { ret void }
+...
+---
+# Check that the instructions are not dependent on each other, even though
+# they all read/write to the zero register.
+# CHECK-LABEL: MI Scheduling
+# CHECK: SU(0): %WZR<def,dead> = SUBSWri %W1, 0, 0, %NZCV<imp-def,dead>
+# CHECK: # succs left : 0
+# CHECK-NOT: Successors:
+# CHECK: SU(1): %W2<def> = COPY %WZR
+# CHECK: # succs left : 0
+# CHECK-NOT: Successors:
+# CHECK: SU(2): %WZR<def,dead> = SUBSWri %W3, 0, 0, %NZCV<imp-def,dead>
+# CHECK: # succs left : 0
+# CHECK-NOT: Successors:
+# CHECK: SU(3): %W4<def> = COPY %WZR
+# CHECK: # succs left : 0
+# CHECK-NOT: Successors:
+name: func
+body: |
+  bb.0:
+    dead %wzr = SUBSWri %w1, 0, 0, implicit-def dead %nzcv
+    %w2 = COPY %wzr
+    dead %wzr = SUBSWri %w3, 0, 0, implicit-def dead %nzcv
+    %w4 = COPY %wzr
+...
diff --git a/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/test/CodeGen/AArch64/selectcc-to-shiftand.ll
new file mode 100644
index 000000000000..0d89cdedfa8a
--- /dev/null
+++ b/test/CodeGen/AArch64/selectcc-to-shiftand.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+; Compare if negative and select of constants where one constant is zero.
+
+define i32 @neg_sel_constants(i32 %a) {
+; CHECK-LABEL: neg_sel_constants:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    mov w8, #5
+; CHECK-NEXT:    and w0, w8, w0, asr #31
+; CHECK-NEXT:    ret
+;
+  %tmp.1 = icmp slt i32 %a, 0
+  %retval = select i1 %tmp.1, i32 5, i32 0
+  ret i32 %retval
+}
+
+; Compare if negative and select of constants where one constant is zero and the other is a single bit.
+
+define i32 @neg_sel_special_constant(i32 %a) {
+; CHECK-LABEL: neg_sel_special_constant:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    lsr w8, w0, #22
+; CHECK-NEXT:    and w0, w8, #0x200
+; CHECK-NEXT:    ret
+;
+  %tmp.1 = icmp slt i32 %a, 0
+  %retval = select i1 %tmp.1, i32 512, i32 0
+  ret i32 %retval
+}
+
+; Compare if negative and select variable or zero.
+
+define i32 @neg_sel_variable_and_zero(i32 %a, i32 %b) {
+; CHECK-LABEL: neg_sel_variable_and_zero:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    and w0, w1, w0, asr #31
+; CHECK-NEXT:    ret
+;
+  %tmp.1 = icmp slt i32 %a, 0
+  %retval = select i1 %tmp.1, i32 %b, i32 0
+  ret i32 %retval
+}
+
+; Compare if not positive and select the same variable as being compared: smin(a, 0).
+
+define i32 @not_pos_sel_same_variable(i32 %a) {
+; CHECK-LABEL: not_pos_sel_same_variable:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    and w0, w0, w0, asr #31
+; CHECK-NEXT:    ret
+;
+  %tmp = icmp slt i32 %a, 1
+  %min = select i1 %tmp, i32 %a, i32 0
+  ret i32 %min
+}
+
+; Flipping the comparison condition can be handled by getting the bitwise not of the sign mask.
+
+; Compare if positive and select of constants where one constant is zero.
+
+define i32 @pos_sel_constants(i32 %a) {
+; CHECK-LABEL: pos_sel_constants:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    mov w8, #5
+; CHECK-NEXT:    bic w0, w8, w0, asr #31
+; CHECK-NEXT:    ret
+;
+  %tmp.1 = icmp sgt i32 %a, -1
+  %retval = select i1 %tmp.1, i32 5, i32 0
+  ret i32 %retval
+}
+
+; Compare if positive and select of constants where one constant is zero and the other is a single bit.
+
+define i32 @pos_sel_special_constant(i32 %a) {
+; CHECK-LABEL: pos_sel_special_constant:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    orr w8, wzr, #0x200
+; CHECK-NEXT:    bic w0, w8, w0, lsr #22
+; CHECK-NEXT:    ret
+;
+  %tmp.1 = icmp sgt i32 %a, -1
+  %retval = select i1 %tmp.1, i32 512, i32 0
+  ret i32 %retval
+}
+
+; Compare if positive and select variable or zero.
+
+define i32 @pos_sel_variable_and_zero(i32 %a, i32 %b) {
+; CHECK-LABEL: pos_sel_variable_and_zero:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    bic w0, w1, w0, asr #31
+; CHECK-NEXT:    ret
+;
+  %tmp.1 = icmp sgt i32 %a, -1
+  %retval = select i1 %tmp.1, i32 %b, i32 0
+  ret i32 %retval
+}
+
+; Compare if not negative or zero and select the same variable as being compared: smax(a, 0).
+
+define i32 @not_neg_sel_same_variable(i32 %a) {
+; CHECK-LABEL: not_neg_sel_same_variable:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    bic w0, w0, w0, asr #31
+; CHECK-NEXT:    ret
+;
+  %tmp = icmp sgt i32 %a, 0
+  %min = select i1 %tmp, i32 %a, i32 0
+  ret i32 %min
+}
+
+; https://llvm.org/bugs/show_bug.cgi?id=31175
+
+; ret = (x-y) > 0 ? x-y : 0
+define i32 @PR31175(i32 %x, i32 %y) {
+; CHECK-LABEL: PR31175:
+; CHECK:       // BB#0:
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    bic w0, w8, w8, asr #31
+; CHECK-NEXT:    ret
+;
+  %sub = sub nsw i32 %x, %y
+  %cmp = icmp sgt i32 %sub, 0
+  %sel = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %sel
+}
+
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
index 925d1881f563..9a44b43d14e6 100644
--- a/test/CodeGen/AArch64/sibling-call.ll
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -aarch64-load-store-opt=0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-ldst-opt=0 | FileCheck %s
 
 declare void @callee_stack0()
 declare void @callee_stack8([8 x i32], i64)
diff --git a/test/CodeGen/AArch64/simple-macho.ll b/test/CodeGen/AArch64/simple-macho.ll
index e9dd98e230db..2c2ad8a7a698 100644
--- a/test/CodeGen/AArch64/simple-macho.ll
+++ b/test/CodeGen/AArch64/simple-macho.ll
@@ -9,4 +9,4 @@ define void @foo() {
 ; CHECK-OBJ: 0: c0 03 5f d6 ret
 
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/sitofp-fixed-legal.ll b/test/CodeGen/AArch64/sitofp-fixed-legal.ll
new file mode 100644
index 000000000000..f2a2642f470f
--- /dev/null
+++ b/test/CodeGen/AArch64/sitofp-fixed-legal.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s
+
+define <16 x double> @test_sitofp_fixed(<16 x i32> %in) {
+; CHECK-LABEL: test_sitofp_fixed:
+
+  ; First, extend each i32 to i64
+; CHECK-DAG: sshll2.2d [[BLOCK0_HI:v[0-9]+]], v0, #0
+; CHECK-DAG: sshll2.2d [[BLOCK1_HI:v[0-9]+]], v1, #0
+; CHECK-DAG: sshll2.2d [[BLOCK2_HI:v[0-9]+]], v2, #0
+; CHECK-DAG: sshll2.2d [[BLOCK3_HI:v[0-9]+]], v3, #0
+; CHECK-DAG: sshll.2d [[BLOCK0_LO:v[0-9]+]], v0, #0
+; CHECK-DAG: sshll.2d [[BLOCK1_LO:v[0-9]+]], v1, #0
+; CHECK-DAG: sshll.2d [[BLOCK2_LO:v[0-9]+]], v2, #0
+; CHECK-DAG: sshll.2d [[BLOCK3_LO:v[0-9]+]], v3, #0
+
+  ; Next, convert each to double.
+; CHECK-DAG: scvtf.2d v0, [[BLOCK0_LO]]
+; CHECK-DAG: scvtf.2d v1, [[BLOCK0_HI]]
+; CHECK-DAG: scvtf.2d v2, [[BLOCK1_LO]]
+; CHECK-DAG: scvtf.2d v3, [[BLOCK1_HI]]
+; CHECK-DAG: scvtf.2d v4, [[BLOCK2_LO]]
+; CHECK-DAG: scvtf.2d v5, [[BLOCK2_HI]]
+; CHECK-DAG: scvtf.2d v6, [[BLOCK3_LO]]
+; CHECK-DAG: scvtf.2d v7, [[BLOCK3_HI]]
+
+; CHECK: ret
+  %flt = sitofp <16 x i32> %in to <16 x double>
+  %res = fdiv <16 x double> %flt, <double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0>
+  ret <16 x double> %res
+}
+
+; This one is small enough to satisfy isSimple, but still illegally large.
+define <4 x double> @test_sitofp_fixed_shortish(<4 x i64> %in) {
+; CHECK-LABEL: test_sitofp_fixed_shortish:
+
+; CHECK-DAG: scvtf.2d v0, v0
+; CHECK-DAG: scvtf.2d v1, v1
+
+; CHECK: ret
+  %flt = sitofp <4 x i64> %in to <4 x double>
+  %res = fdiv <4 x double> %flt, <double 64.0, double 64.0, double 64.0, double 64.0>
+  ret <4 x double> %res
+}
diff --git a/test/CodeGen/AArch64/spill-fold.ll b/test/CodeGen/AArch64/spill-fold.ll
new file mode 100644
index 000000000000..d0055d71808d
--- /dev/null
+++ b/test/CodeGen/AArch64/spill-fold.ll
@@ -0,0 +1,78 @@
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+
+declare i32 @bar()
+declare i32 @baz()
+
+; Check that the spill of the zero value gets stored directly instead
+; of being copied from wzr and then stored.
+define i32 @test_zr_spill_fold1(i1 %c) {
+; CHECK-LABEL: test_zr_spill_fold1:
+entry:
+  br i1 %c, label %if.else, label %if.then
+
+if.else:
+; CHECK: bl bar
+; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
+  %call1 = tail call i32 @bar()
+  br label %if.end
+
+if.then:
+; CHECK: bl baz
+; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
+  %call2 = tail call i32 @baz()
+  br label %if.end
+
+if.end:
+  %x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
+  ret i32 %x.0
+}
+
+; Similar to test_zr_spill_fold1, but with mis-matched register
+; class between %x.0 and the 0 from %if.then.
+define i32 @test_zr_spill_fold2(i1 %c) {
+; CHECK-LABEL: test_zr_spill_fold2:
+entry:
+  br i1 %c, label %if.else, label %if.then
+
+if.else:
+; CHECK: bl bar
+; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]]
+  %call1 = tail call i32 @bar()
+  br label %if.end
+
+if.then:
+; CHECK: bl baz
+; CHECK-NEXT: str wzr, [sp, #[[SLOT]]]
+  %call2 = tail call i32 @baz()
+  br label %if.end
+
+if.end:
+  %x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ]
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind
+  %x.1 = add i32 %x.0, 1
+  ret i32 %x.1
+}
+
+; Similar to test_zr_spill_fold1, but with a cross register-class copy feeding a spill store.
+define float @test_cross_spill_fold(i32 %v) {
+; CHECK-LABEL: test_cross_spill_fold:
+entry:
+; CHECK: str w0, [sp, #[[SLOT:[0-9]+]]]
+  %v.f = bitcast i32 %v to float
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind
+; CHECK: ldr s0, [sp, #[[SLOT]]]
+  ret float %v.f
+}
+
+; Similar to test_cross_spill_fold, but with a cross register-class copy fed by a refill load.
+define float @test_cross_spill_fold2(i32 %v) {
+; CHECK-LABEL: test_cross_spill_fold2:
+entry:
+; CHECK: str w0, [sp, #[[SLOT:[0-9]+]]]
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind
+; CHECK: ldr s0, [sp, #[[SLOT]]]
+  %v.f = bitcast i32 %v to float
+  ret float %v.f
+}
+
diff --git a/test/CodeGen/AArch64/sqrt-fastmath.ll b/test/CodeGen/AArch64/sqrt-fastmath.ll
index 0d9533fd27fc..079562c05819 100644
--- a/test/CodeGen/AArch64/sqrt-fastmath.ll
+++ b/test/CodeGen/AArch64/sqrt-fastmath.ll
@@ -1,13 +1,13 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!sqrt,!vec-sqrt | FileCheck %s --check-prefix=FAULT
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=sqrt,vec-sqrt   | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon,-use-reverse-square-root  | FileCheck %s --check-prefix=FAULT
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon,+use-reverse-square-root | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s
 
-declare float @llvm.sqrt.f32(float) #1
-declare double @llvm.sqrt.f64(double) #1
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #1
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #1
+declare float @llvm.sqrt.f32(float) #0
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
+declare double @llvm.sqrt.f64(double) #0
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0
 
 define float @fsqrt(float %a) #0 {
   %1 = tail call fast float @llvm.sqrt.f32(float %a)
@@ -19,12 +19,14 @@ define float @fsqrt(float %a) #0 {
 
 ; CHECK-LABEL: fsqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:s[0-7]]]
+; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]]
+; CHECK: fcmp s0, #0
 }
 
 define <2 x float> @f2sqrt(<2 x float> %a) #0 {
-  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2
+  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
   ret <2 x float> %1
 
 ; FAULT-LABEL: f2sqrt:
@@ -33,13 +35,14 @@ define <2 x float> @f2sqrt(<2 x float> %a) #0 {
 
 ; CHECK-LABEL: f2sqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0
 }
 
 define <4 x float> @f4sqrt(<4 x float> %a) #0 {
-  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2
+  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
   ret <4 x float> %1
 
 ; FAULT-LABEL: f4sqrt:
@@ -48,9 +51,27 @@ define <4 x float> @f4sqrt(<4 x float> %a) #0 {
 
 ; CHECK-LABEL: f4sqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0
+}
+
+define <8 x float> @f8sqrt(<8 x float> %a) #0 {
+  %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
+  ret <8 x float> %1
+
+; FAULT-LABEL: f8sqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: f8sqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
+; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
+; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.4s, v[0-1]\.4s}}, #0
 }
 
 define double @dsqrt(double %a) #0 {
@@ -63,12 +84,14 @@ define double @dsqrt(double %a) #0 {
 
 ; CHECK-LABEL: dsqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:d[0-7]]]
+; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]]
+; CHECK: fcmp d0, #0
 }
 
 define <2 x double> @d2sqrt(<2 x double> %a) #0 {
-  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2
+  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
   ret <2 x double> %1
 
 ; FAULT-LABEL: d2sqrt:
@@ -77,9 +100,27 @@ define <2 x double> @d2sqrt(<2 x double> %a) #0 {
 
 ; CHECK-LABEL: d2sqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0
+}
+
+define <4 x double> @d4sqrt(<4 x double> %a) #0 {
+  %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
+  ret <4 x double> %1
+
+; FAULT-LABEL: d4sqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: d4sqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
+; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
+; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
+; CHECK: fcmeq {{v[0-7]\.2d, v[0-1]\.2d}}, #0
 }
 
 define float @frsqrt(float %a) #0 {
@@ -93,12 +134,14 @@ define float @frsqrt(float %a) #0 {
 
 ; CHECK-LABEL: frsqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:s[0-7]]]
+; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]]
+; CHECK-NOT: fcmp {{s[0-7]}}, #0
 }
 
 define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
-  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2
+  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
   %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
   ret <2 x float> %2
 
@@ -108,12 +151,14 @@ define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
 
 ; CHECK-LABEL: f2rsqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0
 }
 
 define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
-  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2
+  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
   %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
   ret <4 x float> %2
 
@@ -123,8 +168,28 @@ define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
 
 ; CHECK-LABEL: f4rsqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0
+}
+
+define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
+  %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
+  %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
+  ret <8 x float> %2
+
+; FAULT-LABEL: f8rsqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: f8rsqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
+; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
+; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0
 }
 
 define double @drsqrt(double %a) #0 {
@@ -138,12 +203,14 @@ define double @drsqrt(double %a) #0 {
 
 ; CHECK-LABEL: drsqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:d[0-7]]]
+; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]]
+; CHECK-NOT: fcmp d0, #0
 }
 
 define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
-  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2
+  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
   %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1
   ret <2 x double> %2
 
@@ -153,8 +220,28 @@ define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
 
 ; CHECK-LABEL: d2rsqrt:
 ; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
+; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
+; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0
+}
+
+define <4 x double> @d4rsqrt(<4 x double> %a) #0 {
+  %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
+  %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1
+  ret <4 x double> %2
+
+; FAULT-LABEL: d4rsqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: d4rsqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
+; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
+; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
+; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0
 }
 
 attributes #0 = { nounwind "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/AArch64/stackmap-liveness.ll b/test/CodeGen/AArch64/stackmap-liveness.ll
index 224a9c418526..4b04276ac226 100644
--- a/test/CodeGen/AArch64/stackmap-liveness.ll
+++ b/test/CodeGen/AArch64/stackmap-liveness.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 ; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:   __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 2
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -44,4 +44,3 @@ define i64 @stackmap_liveness(i1 %c) {
 }
 
 declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
-
diff --git a/test/CodeGen/AArch64/subs-to-sub-opt.ll b/test/CodeGen/AArch64/subs-to-sub-opt.ll
index f33e24e777fe..ce544d351c81 100644
--- a/test/CodeGen/AArch64/subs-to-sub-opt.ll
+++ b/test/CodeGen/AArch64/subs-to-sub-opt.ll
@@ -7,7 +7,7 @@
 define i32 @test01() nounwind {
 ; CHECK: ldrb {{.*}}
 ; CHECK-NEXT: ldrb {{.*}}
-; CHECK-NEXT: sub {{.*}}
+; CHECK: sub {{.*}}
 ; CHECK-NEXT: cmn {{.*}}
 entry:
   %0 = load i8, i8* @a, align 1
diff --git a/test/CodeGen/AArch64/swift-return.ll b/test/CodeGen/AArch64/swift-return.ll
new file mode 100644
index 000000000000..15c19ce36196
--- /dev/null
+++ b/test/CodeGen/AArch64/swift-return.ll
@@ -0,0 +1,296 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s
+; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0
+
+; CHECK-LABEL: test1
+; CHECK: bl      _gen
+; CHECK: sxth    [[TMP:w.*]], w0
+; CHECK: add     w0, [[TMP]], w1, sxtb
+; CHECK-O0-LABEL: test1
+; CHECK-O0: bl      _gen
+; CHECK-O0: sxth    [[TMP:w.*]], w0
+; CHECK-O0: add     w0, [[TMP]], w1, sxtb
+define i16 @test1(i32) {
+entry:
+  %call = call swiftcc { i16, i8 } @gen(i32 %0)
+  %v3 = extractvalue { i16, i8 } %call, 0
+  %v1 = sext i16 %v3 to i32
+  %v5 = extractvalue { i16, i8 } %call, 1
+  %v2 = sext i8 %v5 to i32
+  %add = add nsw i32 %v1, %v2
+  %conv = trunc i32 %add to i16
+  ret i16 %conv
+}
+
+declare swiftcc { i16, i8 } @gen(i32)
+
+; CHECK-LABEL: test2
+; CHECK:  bl      _gen2
+; CHECK:  add     [[TMP:x.*]], x0, x1
+; CHECK:  add     [[TMP]], [[TMP]], x2
+; CHECK:  add     [[TMP]], [[TMP]], x3
+; CHECK:  add     x0, [[TMP]], x4
+; CHECK-O0-LABEL: test2
+; CHECK-O0:  bl      _gen2
+; CHECK-O0:  add     [[TMP:x.*]], x0, x1
+; CHECK-O0:  add     [[TMP]], [[TMP]], x2
+; CHECK-O0:  add     [[TMP]], [[TMP]], x3
+; CHECK-O0:  add     x0, [[TMP]], x4
+
+define i64 @test2(i64 %key) {
+entry:
+  %key.addr = alloca i64, align 4
+  store i64 %key, i64* %key.addr, align 4
+  %0 = load i64, i64* %key.addr, align 4
+  %call = call swiftcc { i64, i64, i64, i64, i64 } @gen2(i64 %0)
+
+  %v3 = extractvalue { i64, i64, i64, i64, i64 } %call, 0
+  %v5 = extractvalue { i64, i64, i64, i64, i64 } %call, 1
+  %v6 = extractvalue { i64, i64, i64, i64, i64 } %call, 2
+  %v7 = extractvalue { i64, i64, i64, i64, i64 } %call, 3
+  %v8 = extractvalue { i64, i64, i64, i64, i64 } %call, 4
+
+  %add = add nsw i64 %v3, %v5
+  %add1 = add nsw i64 %add, %v6
+  %add2 = add nsw i64 %add1, %v7
+  %add3 = add nsw i64 %add2, %v8
+  ret i64 %add3
+}
+; CHECK-LABEL: gen2:
+; CHECK:  mov      x1, x0
+; CHECK:  mov      x2, x0
+; CHECK:  mov      x3, x0
+; CHECK:  mov      x4, x0
+; CHECK:  ret
+define swiftcc { i64, i64, i64, i64, i64 } @gen2(i64 %key) {
+  %Y = insertvalue { i64, i64, i64, i64, i64 } undef, i64 %key, 0
+  %Z = insertvalue { i64, i64, i64, i64, i64 } %Y, i64 %key, 1
+  %Z2 = insertvalue { i64, i64, i64, i64, i64 } %Z, i64 %key, 2
+  %Z3 = insertvalue { i64, i64, i64, i64, i64 } %Z2, i64 %key, 3
+  %Z4 = insertvalue { i64, i64, i64, i64, i64 } %Z3, i64 %key, 4
+  ret { i64, i64, i64, i64, i64 } %Z4
+}
+
+; CHECK-LABEL: test3
+; CHECK: bl      _gen3
+; CHECK: add             [[TMP:w.*]], w0, w1
+; CHECK: add             [[TMP]], [[TMP]], w2
+; CHECK: add             w0, [[TMP]], w3
+; CHECK-O0-LABEL: test3
+; CHECK-O0: bl      _gen3
+; CHECK-O0: add             [[TMP:w.*]], w0, w1
+; CHECK-O0: add             [[TMP]], [[TMP]], w2
+; CHECK-O0: add             w0, [[TMP]], w3
+define i32 @test3(i32) {
+entry:
+  %call = call swiftcc { i32, i32, i32, i32 } @gen3(i32 %0)
+
+  %v3 = extractvalue { i32, i32, i32, i32 } %call, 0
+  %v5 = extractvalue { i32, i32, i32, i32 } %call, 1
+  %v6 = extractvalue { i32, i32, i32, i32 } %call, 2
+  %v7 = extractvalue { i32, i32, i32, i32 } %call, 3
+
+  %add = add nsw i32 %v3, %v5
+  %add1 = add nsw i32 %add, %v6
+  %add2 = add nsw i32 %add1, %v7
+  ret i32 %add2
+}
+
+declare swiftcc { i32, i32, i32, i32 } @gen3(i32 %key)
+
+; CHECK-LABEL: test4
+; CHECK: bl      _gen4
+; CHECK: fadd    s0, s0, s1
+; CHECK: fadd    s0, s0, s2
+; CHECK: fadd    s0, s0, s3
+; CHECK-O0-LABEL: test4
+; CHECK-O0: bl      _gen4
+; CHECK-O0: fadd    s0, s0, s1
+; CHECK-O0: fadd    s0, s0, s2
+; CHECK-O0: fadd    s0, s0, s3
+define float @test4(float) {
+entry:
+  %call = call swiftcc { float, float, float, float } @gen4(float %0)
+
+  %v3 = extractvalue { float, float, float, float } %call, 0
+  %v5 = extractvalue { float, float, float, float } %call, 1
+  %v6 = extractvalue { float, float, float, float } %call, 2
+  %v7 = extractvalue { float, float, float, float } %call, 3
+
+  %add = fadd float %v3, %v5
+  %add1 = fadd float %add, %v6
+  %add2 = fadd float %add1, %v7
+  ret float %add2
+}
+
+declare swiftcc { float, float, float, float } @gen4(float %key)
+
+; CHECK-LABEL: test5
+; CHECK:  bl      _gen5
+; CHECK:  fadd    d0, d0, d1
+; CHECK:  fadd    d0, d0, d2
+; CHECK:  fadd    d0, d0, d3
+; CHECK-O0-LABEL: test5
+; CHECK-O0:  bl      _gen5
+; CHECK-O0:  fadd    d0, d0, d1
+; CHECK-O0:  fadd    d0, d0, d2
+; CHECK-O0:  fadd    d0, d0, d3
+define swiftcc double @test5(){
+entry:
+  %call = call swiftcc { double, double, double, double } @gen5()
+
+  %v3 = extractvalue { double, double, double, double } %call, 0
+  %v5 = extractvalue { double, double, double, double } %call, 1
+  %v6 = extractvalue { double, double, double, double } %call, 2
+  %v7 = extractvalue { double, double, double, double } %call, 3
+
+  %add = fadd double %v3, %v5
+  %add1 = fadd double %add, %v6
+  %add2 = fadd double %add1, %v7
+  ret double %add2
+}
+
+declare swiftcc { double, double, double, double } @gen5()
+
+; CHECK-LABEL: test6
+; CHECK:   bl      _gen6
+; CHECK-DAG:   fadd    d0, d0, d1
+; CHECK-DAG:   fadd    d0, d0, d2
+; CHECK-DAG:   fadd    d0, d0, d3
+; CHECK-DAG:   add     [[TMP:w.*]], w0, w1
+; CHECK-DAG:   add     [[TMP]], [[TMP]], w2
+; CHECK-DAG:   add     w0, [[TMP]], w3
+; CHECK-O0-LABEL: test6
+; CHECK-O0:   bl      _gen6
+; CHECK-O0-DAG:   fadd    d0, d0, d1
+; CHECK-O0-DAG:   fadd    d0, d0, d2
+; CHECK-O0-DAG:   fadd    d0, d0, d3
+; CHECK-O0-DAG:   add     [[TMP:w.*]], w0, w1
+; CHECK-O0-DAG:   add     [[TMP]], [[TMP]], w2
+; CHECK-O0-DAG:   add     w0, [[TMP]], w3
+define swiftcc { double, i32 } @test6() {
+entry:
+  %call = call swiftcc { double, double, double, double, i32, i32, i32, i32 } @gen6()
+
+  %v3 = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 0
+  %v5 = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 1
+  %v6 = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 2
+  %v7 = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 3
+  %v3.i = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 4
+  %v5.i = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 5
+  %v6.i = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 6
+  %v7.i = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 7
+
+  %add = fadd double %v3, %v5
+  %add1 = fadd double %add, %v6
+  %add2 = fadd double %add1, %v7
+
+  %add.i = add nsw i32 %v3.i, %v5.i
+  %add1.i = add nsw i32 %add.i, %v6.i
+  %add2.i = add nsw i32 %add1.i, %v7.i
+
+  %Y = insertvalue { double, i32 } undef, double %add2, 0
+  %Z = insertvalue { double, i32 } %Y, i32 %add2.i, 1
+  ret { double, i32} %Z
+}
+
+declare swiftcc { double, double, double, double, i32, i32, i32, i32 } @gen6()
+
+; CHECK-LABEL: _gen7
+; CHECK-DAG:   mov      w1, w0
+; CHECK-DAG:   mov      w2, w0
+; CHECK-DAG:   mov      w3, w0
+; CHECK:   ret
+; CHECK-O0-LABEL: _gen7
+; CHECK-O0:  str     w0, [sp, #12]
+; CHECK-O0:  ldr     w1, [sp, #12]
+; CHECK-O0:  ldr     w2, [sp, #12]
+; CHECK-O0:  ldr     w3, [sp, #12]
+define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) {
+  %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0
+  %v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1
+  %v2 = insertvalue { i32, i32, i32, i32 } %v1, i32 %key, 2
+  %v3 = insertvalue { i32, i32, i32, i32 } %v2, i32 %key, 3
+  ret { i32, i32, i32, i32 } %v3
+}
+
+; CHECK-LABEL: _gen9
+; CHECK:  mov      w1, w0
+; CHECK:  mov      w2, w0
+; CHECK:  mov      w3, w0
+; CHECK:  ret
+; CHECK-O0-LABEL: _gen9
+; CHECK-O0:  str     w0, [sp, #12]
+; CHECK-O0:  ldr     w1, [sp, #12]
+; CHECK-O0:  ldr     w2, [sp, #12]
+; CHECK-O0:  ldr     w3, [sp, #12]
+define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) {
+  %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0
+  %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1
+  %v2 = insertvalue { i8, i8, i8, i8 } %v1, i8 %key, 2
+  %v3 = insertvalue { i8, i8, i8, i8 } %v2, i8 %key, 3
+  ret { i8, i8, i8, i8 } %v3
+}
+
+; CHECK-LABEL: _gen10
+; CHECK:  mov.16b         v1, v0
+; CHECK:  mov.16b         v2, v0
+; CHECK:  mov.16b         v3, v0
+; CHECK:  mov      w1, w0
+; CHECK:  mov      w2, w0
+; CHECK:  mov      w3, w0
+; CHECK:  ret
+define swiftcc { double, double, double, double, i32, i32, i32, i32 } @gen10(double %keyd, i32 %keyi) {
+  %v0 = insertvalue { double, double, double, double, i32, i32, i32, i32 } undef, double %keyd, 0
+  %v1 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v0, double %keyd, 1
+  %v2 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v1, double %keyd, 2
+  %v3 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v2, double %keyd, 3
+  %v4 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v3, i32 %keyi, 4
+  %v5 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v4, i32 %keyi, 5
+  %v6 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v5, i32 %keyi, 6
+  %v7 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v6, i32 %keyi, 7
+  ret { double, double, double, double, i32, i32, i32, i32 } %v7
+}
+
+; CHECK-LABEL: _test11
+; CHECK:  bl      _gen11
+; CHECK:  fadd.4s v0, v0, v1
+; CHECK:  fadd.4s v0, v0, v2
+; CHECK:  fadd.4s v0, v0, v3
+define swiftcc <4 x float> @test11() {
+entry:
+  %call = call swiftcc { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @gen11()
+
+  %v3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 0
+  %v5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 1
+  %v6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 2
+  %v7 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 3
+
+  %add = fadd <4 x float> %v3, %v5
+  %add1 = fadd <4 x float> %add, %v6
+  %add2 = fadd <4 x float> %add1, %v7
+  ret <4 x float> %add2
+}
+
+declare swiftcc { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @gen11()
+
+; CHECK-LABEL: _test12
+; CHECK:  fadd.4s v0, v0, v1
+; CHECK:  fadd.4s v0, v0, v2
+; CHECK:  mov.16b v1, v3
+define swiftcc { <4 x float>, float } @test12() #0 {
+entry:
+  %call = call swiftcc { <4 x float>, <4 x float>, <4 x float>, float } @gen12()
+
+  %v3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 0
+  %v5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 1
+  %v6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 2
+  %v8 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 3
+
+  %add = fadd <4 x float> %v3, %v5
+  %add1 = fadd <4 x float> %add, %v6
+  %res.0 = insertvalue { <4 x float>, float } undef, <4 x float> %add1, 0
+  %res = insertvalue { <4 x float>, float } %res.0, float %v8, 1
+  ret { <4 x float>, float } %res
+}
+
+declare swiftcc { <4 x float>, <4 x float>, <4 x float>, float } @gen12()
diff --git a/test/CodeGen/AArch64/swiftcc.ll b/test/CodeGen/AArch64/swiftcc.ll
new file mode 100644
index 000000000000..432495427152
--- /dev/null
+++ b/test/CodeGen/AArch64/swiftcc.ll
@@ -0,0 +1,11 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s
+; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s
+
+; CHECK: t1
+; CHECK: fadd s0, s0, s1
+; CHECK: ret
+define swiftcc float @t1(float %a, float %b) {
+entry:
+  %add = fadd float %a, %b
+  ret float %add
+}
diff --git a/test/CodeGen/AArch64/swifterror.ll b/test/CodeGen/AArch64/swifterror.ll
index a0bfffdef95e..b15eaa923f08 100644
--- a/test/CodeGen/AArch64/swifterror.ll
+++ b/test/CodeGen/AArch64/swifterror.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -disable-fp-elim < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-APPLE %s
+; RUN: llc -verify-machineinstrs -disable-fp-elim -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-APPLE %s
 ; RUN: llc -verify-machineinstrs -disable-fp-elim -O0 < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-O0 %s
 
 declare i8* @malloc(i64)
@@ -19,10 +19,11 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-O0-LABEL: foo:
 ; CHECK-O0: orr w{{.*}}, wzr, #0x10
 ; CHECK-O0: malloc
-; CHECK-O0: mov [[ID2:x[0-9]+]], x0
+; CHECK-O0: mov x19, x0
+; CHECK-O0-NOT: x19
 ; CHECK-O0: orr [[ID:w[0-9]+]], wzr, #0x1
+; CHECK-O0-NOT: x19
 ; CHECK-O0: strb [[ID]], [x0, #8]
-; CHECK-O0: mov x19, [[ID2]]
 ; CHECK-O0-NOT: x19
 entry:
   %call = call i8* @malloc(i64 16)
@@ -44,13 +45,13 @@ define float @caller(i8* %error_ref) {
 ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
 ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
 ; CHECK-APPLE: mov x0, x19
-; CHECK_APPLE: bl {{.*}}free
+; CHECK-APPLE: bl {{.*}}free
 
 ; CHECK-O0-LABEL: caller:
 ; CHECK-O0: mov x19
 ; CHECK-O0: bl {{.*}}foo
 ; CHECK-O0: mov [[ID:x[0-9]+]], x19
-; CHECK-O0: cbnz [[ID]]
+; CHECK-O0: cbnz x19
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -83,13 +84,13 @@ define float @caller2(i8* %error_ref) {
 ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
 ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
 ; CHECK-APPLE: mov x0, x19
-; CHECK_APPLE: bl {{.*}}free
+; CHECK-APPLE: bl {{.*}}free
 
 ; CHECK-O0-LABEL: caller2:
 ; CHECK-O0: mov x19
 ; CHECK-O0: bl {{.*}}foo
 ; CHECK-O0: mov [[ID:x[0-9]+]], x19
-; CHECK-O0: cbnz [[ID]]
+; CHECK-O0: cbnz x19
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   br label %bb_loop
@@ -128,7 +129,7 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 
 ; CHECK-O0-LABEL: foo_if:
 ; spill x19
-; CHECK-O0: str x19
+; CHECK-O0: str x19, [sp, [[SLOT:#[0-9]+]]]
 ; CHECK-O0: cbz w0
 ; CHECK-O0: orr w{{.*}}, wzr, #0x10
 ; CHECK-O0: malloc
@@ -138,7 +139,8 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-O0: mov x19, [[ID]]
 ; CHECK-O0: ret
 ; reload from stack
-; CHECK-O0: ldr x19
+; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT]]]
+; CHECK-O0: mov x19, [[ID3]]
 ; CHECK-O0: ret
 entry:
   %cond = icmp ne i32 %cc, 0
@@ -172,18 +174,26 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
 
 ; CHECK-O0-LABEL: foo_loop:
 ; spill x19
-; CHECK-O0: str x19
-; CHECk-O0: cbz
+; CHECK-O0: str x19, [sp, [[SLOT:#[0-9]+]]]
+; CHECK-O0: b [[BB1:[A-Za-z0-9_]*]]
+; CHECK-O0: [[BB1]]:
+; CHECK-O0: ldr     x0, [sp, [[SLOT]]]
+; CHECK-O0: str     x0, [sp, [[SLOT2:#[0-9]+]]]
+; CHECK-O0: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]]
 ; CHECK-O0: orr w{{.*}}, wzr, #0x10
 ; CHECK-O0: malloc
 ; CHECK-O0: mov [[ID:x[0-9]+]], x0
 ; CHECK-O0: strb w{{.*}}, [{{.*}}[[ID]], #8]
 ; spill x0
-; CHECK-O0: str x0
+; CHECK-O0: str x0, [sp, [[SLOT2]]]
+; CHECK-O0:[[BB2]]:
+; CHECK-O0: ldr     x0, [sp, [[SLOT2]]]
 ; CHECK-O0: fcmp
-; CHECK-O0: b.le
+; CHECK-O0: str     x0, [sp]
+; CHECK-O0: b.le [[BB1]]
 ; reload from stack
-; CHECK-O0: ldr x19
+; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp]
+; CHECK-O0: mov x19, [[ID3]]
 ; CHECK-O0: ret
 entry:
   br label %bb_loop
@@ -258,7 +268,7 @@ define float @caller3(i8* %error_ref) {
 ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
 ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
 ; CHECK-APPLE: mov x0, x19
-; CHECK_APPLE: bl {{.*}}free
+; CHECK-APPLE: bl {{.*}}free
 
 ; CHECK-O0-LABEL: caller3:
 ; spill x0
@@ -272,7 +282,7 @@ define float @caller3(i8* %error_ref) {
 ; CHECK-O0: ldrb [[CODE:w[0-9]+]]
 ; CHECK-O0: ldr [[ID:x[0-9]+]]
 ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK_O0: bl {{.*}}free
+; CHECK-O0: bl {{.*}}free
 entry:
   %s = alloca %struct.S, align 8
   %error_ptr_ref = alloca swifterror %swift_error*
@@ -353,7 +363,7 @@ define float @caller4(i8* %error_ref) {
 ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
 ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
 ; CHECK-APPLE: mov x0, x19
-; CHECK_APPLE: bl {{.*}}free
+; CHECK-APPLE: bl {{.*}}free
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -383,3 +393,193 @@ handler:
   call void @free(i8* %tmp)
   ret float 1.0
 }
+
+; Check that we don't blow up on tail calling swifterror argument functions.
+define float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref) {
+entry:
+  %0 = tail call float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref)
+  ret float %0
+}
+define swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref) {
+entry:
+  %0 = tail call swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref)
+  ret float %0
+}
+
+; CHECK-APPLE-LABEL: swifterror_clobber
+; CHECK-APPLE: mov [[REG:x[0-9]+]], x19
+; CHECK-APPLE: nop
+; CHECK-APPLE: mov x19, [[REG]]
+define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err) {
+  call void asm sideeffect "nop", "~{x19}"()
+  ret void
+}
+
+; CHECK-APPLE-LABEL: swifterror_reg_clobber
+; CHECK-APPLE: stp {{.*}}x19
+; CHECK-APPLE: nop
+; CHECK-APPLE: ldp  {{.*}}x19
+define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
+  call void asm sideeffect "nop", "~{x19}"()
+  ret void
+}
+; CHECK-APPLE-LABEL: params_in_reg
+; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2.
+; CHECK-APPLE:  stp     x19, x28, [sp
+; CHECK-APPLE:  stp     x27, x26, [sp
+; CHECK-APPLE:  stp     x25, x24, [sp
+; CHECK-APPLE:  stp     x23, x22, [sp
+; CHECK-APPLE:  stp     x21, x20, [sp
+; CHECK-APPLE:  stp     x29, x30, [sp
+; CHECK-APPLE:  str     x20, [sp
+; Store argument registers.
+; CHECK-APPLE:  mov      x23, x7
+; CHECK-APPLE:  mov      x24, x6
+; CHECK-APPLE:  mov      x25, x5
+; CHECK-APPLE:  mov      x26, x4
+; CHECK-APPLE:  mov      x27, x3
+; CHECK-APPLE:  mov      x28, x2
+; CHECK-APPLE:  mov      x21, x1
+; CHECK-APPLE:  mov      x22, x0
+; Setup call.
+; CHECK-APPLE:  orr     w0, wzr, #0x1
+; CHECK-APPLE:  orr     w1, wzr, #0x2
+; CHECK-APPLE:  orr     w2, wzr, #0x3
+; CHECK-APPLE:  orr     w3, wzr, #0x4
+; CHECK-APPLE:  mov     w4, #5
+; CHECK-APPLE:  orr     w5, wzr, #0x6
+; CHECK-APPLE:  orr     w6, wzr, #0x7
+; CHECK-APPLE:  orr     w7, wzr, #0x8
+; CHECK-APPLE:  mov      x20, xzr
+; CHECK-APPLE:  mov      x19, xzr
+; CHECK-APPLE:  bl      _params_in_reg2
+; Restore original arguments for next call.
+; CHECK-APPLE:  mov      x0, x22
+; CHECK-APPLE:  mov      x1, x21
+; CHECK-APPLE:  mov      x2, x28
+; CHECK-APPLE:  mov      x3, x27
+; CHECK-APPLE:  mov      x4, x26
+; CHECK-APPLE:  mov      x5, x25
+; CHECK-APPLE:  mov      x6, x24
+; CHECK-APPLE:  mov      x7, x23
+; Restore original swiftself argument and swifterror %err.
+; CHECK-APPLE:  ldp             x20, x19, [sp
+; CHECK-APPLE:  bl      _params_in_reg2
+; Restore calle save registers but don't clober swifterror x19.
+; CHECK-APPLE-NOT: x19
+; CHECK-APPLE:  ldp     x29, x30, [sp
+; CHECK-APPLE-NOT: x19
+; CHECK-APPLE:  ldp     x21, x20, [sp
+; CHECK-APPLE-NOT: x19
+; CHECK-APPLE:  ldp     x23, x22, [sp
+; CHECK-APPLE-NOT: x19
+; CHECK-APPLE:  ldp     x25, x24, [sp
+; CHECK-APPLE-NOT: x19
+; CHECK-APPLE:  ldp     x27, x26, [sp
+; CHECK-APPLE-NOT: x19
+; CHECK-APPLE:  ldr     x28, [sp
+; CHECK-APPLE-NOT: x19
+; CHECK-APPLE:  ret
+define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) {
+  %error_ptr_ref = alloca swifterror %swift_error*, align 8
+  store %swift_error* null, %swift_error** %error_ptr_ref
+  call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
+  call swiftcc void @params_in_reg2(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i8* swiftself %8, %swift_error** nocapture swifterror %err)
+  ret void
+}
+declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err)
+
+; CHECK-APPLE-LABEL: params_and_return_in_reg
+; Store callee saved registers.
+; CHECK-APPLE:  stp     x20, x28, [sp, #24
+; CHECK-APPLE:  stp     x27, x26, [sp
+; CHECK-APPLE:  stp     x25, x24, [sp
+; CHECK-APPLE:  stp     x23, x22, [sp
+; CHECK-APPLE:  stp     x21, x20, [sp
+; CHECK-APPLE:  stp     x29, x30, [sp
+; Save original arguments.
+; CHECK-APPLE:  mov      x23, x19
+; CHECK-APPLE:  str     x7, [sp, #16]
+; CHECK-APPLE:  mov      x24, x6
+; CHECK-APPLE:  mov      x25, x5
+; CHECK-APPLE:  mov      x26, x4
+; CHECK-APPLE:  mov      x27, x3
+; CHECK-APPLE:  mov      x28, x2
+; CHECK-APPLE:  mov      x21, x1
+; CHECK-APPLE:  mov      x22, x0
+; Setup call arguments.
+; CHECK-APPLE:  orr     w0, wzr, #0x1
+; CHECK-APPLE:  orr     w1, wzr, #0x2
+; CHECK-APPLE:  orr     w2, wzr, #0x3
+; CHECK-APPLE:  orr     w3, wzr, #0x4
+; CHECK-APPLE:  mov     w4, #5
+; CHECK-APPLE:  orr     w5, wzr, #0x6
+; CHECK-APPLE:  orr     w6, wzr, #0x7
+; CHECK-APPLE:  orr     w7, wzr, #0x8
+; CHECK-APPLE:  mov      x20, xzr
+; CHECK-APPLE:  mov      x19, xzr
+; CHECK-APPLE:  bl      _params_in_reg2
+; Store swifterror %error_ptr_ref.
+; CHECK-APPLE:  str     x19, [sp, #8]
+; Setup call arguments from original arguments.
+; CHECK-APPLE:  mov      x0, x22
+; CHECK-APPLE:  mov      x1, x21
+; CHECK-APPLE:  mov      x2, x28
+; CHECK-APPLE:  mov      x3, x27
+; CHECK-APPLE:  mov      x4, x26
+; CHECK-APPLE:  mov      x5, x25
+; CHECK-APPLE:  mov      x6, x24
+; CHECK-APPLE:  ldp     x7, x20, [sp, #16]
+; CHECK-APPLE:  mov      x19, x23
+; CHECK-APPLE:  bl      _params_and_return_in_reg2
+; Store return values.
+; CHECK-APPLE:  mov      x21, x0
+; CHECK-APPLE:  mov      x22, x1
+; CHECK-APPLE:  mov      x24, x2
+; CHECK-APPLE:  mov      x25, x3
+; CHECK-APPLE:  mov      x26, x4
+; CHECK-APPLE:  mov      x27, x5
+; CHECK-APPLE:  mov      x28, x6
+; CHECK-APPLE:  mov      x23, x7
+; Save swifterror %err.
+; CHECK-APPLE:  str     x19, [sp, #24]
+; Setup call.
+; CHECK-APPLE:  orr     w0, wzr, #0x1
+; CHECK-APPLE:  orr     w1, wzr, #0x2
+; CHECK-APPLE:  orr     w2, wzr, #0x3
+; CHECK-APPLE:  orr     w3, wzr, #0x4
+; CHECK-APPLE:  mov     w4, #5
+; CHECK-APPLE:  orr     w5, wzr, #0x6
+; CHECK-APPLE:  orr     w6, wzr, #0x7
+; CHECK-APPLE:  orr     w7, wzr, #0x8
+; CHECK-APPLE:  mov      x20, xzr
+; ... setup call with swiferror %error_ptr_ref.
+; CHECK-APPLE:  ldr     x19, [sp, #8]
+; CHECK-APPLE:  bl      _params_in_reg2
+; Restore return values for return from this function.
+; CHECK-APPLE:  mov      x0, x21
+; CHECK-APPLE:  mov      x1, x22
+; CHECK-APPLE:  mov      x2, x24
+; CHECK-APPLE:  mov      x3, x25
+; CHECK-APPLE:  mov      x4, x26
+; CHECK-APPLE:  mov      x5, x27
+; CHECK-APPLE:  mov      x6, x28
+; CHECK-APPLE:  mov      x7, x23
+; Restore swifterror %err and callee save registers.
+; CHECK-APPLE:  ldp     x19, x28, [sp, #24
+; CHECK-APPLE:  ldp     x29, x30, [sp
+; CHECK-APPLE:  ldp     x21, x20, [sp
+; CHECK-APPLE:  ldp     x23, x22, [sp
+; CHECK-APPLE:  ldp     x25, x24, [sp
+; CHECK-APPLE:  ldp     x27, x26, [sp
+; CHECK-APPLE:  ret
+define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) {
+  %error_ptr_ref = alloca swifterror %swift_error*, align 8
+  store %swift_error* null, %swift_error** %error_ptr_ref
+  call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
+  %val = call swiftcc  { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i8* swiftself %8, %swift_error** nocapture swifterror %err)
+  call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
+  ret { i64, i64, i64, i64, i64, i64, i64, i64 } %val
+}
+
+declare swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err)
diff --git a/test/CodeGen/AArch64/tail-dup-repeat-worklist.ll b/test/CodeGen/AArch64/tail-dup-repeat-worklist.ll
new file mode 100644
index 000000000000..c2997c50f4d4
--- /dev/null
+++ b/test/CodeGen/AArch64/tail-dup-repeat-worklist.ll
@@ -0,0 +1,69 @@
+; RUN: llc -O3 -o - -verify-machineinstrs %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+%struct.s1 = type { %struct.s3*, %struct.s1* }
+%struct.s2 = type opaque
+%struct.s3 = type { i32 }
+
+; Function Attrs: nounwind
+define internal fastcc i32 @repeated_dup_worklist(%struct.s1** %pp1, %struct.s2* %p2, i32 %state, i1 %i1_1, i32 %i32_1) unnamed_addr #0 {
+entry:
+  br label %while.cond.outer
+
+; The loop gets laid out:
+; %while.cond.outer
+; %(null)
+; %(null)
+; %dup2
+; and then %dup1 gets chosen as the next block.
+; when dup2 is duplicated into dup1, %worklist could erroneously be placed on
+; the worklist, because all of its current predecessors are now scheduled.
+; However, after dup2 is tail-duplicated, %worklist can't be on the worklist
+; because it now has unscheduled predecessors.q
+; CHECK-LABEL: repeated_dup_worklist
+; CHECK: // %entry
+; CHECK: // %while.cond.outer
+; first %(null) block
+; CHECK: // in Loop:
+; CHECK: ldr
+; CHECK-NEXT: tbnz
+; second %(null) block
+; CHECK: // in Loop:
+; CHECK: // %dup2
+; CHECK: // %worklist
+; CHECK: // %if.then96.i
+while.cond.outer:                                 ; preds = %dup1, %entry
+  %progress.0.ph = phi i32 [ 0, %entry ], [ %progress.1, %dup1 ]
+  %inc77 = add nsw i32 %progress.0.ph, 1
+  %cmp = icmp slt i32 %progress.0.ph, %i32_1
+  br i1 %cmp, label %dup2, label %dup1
+
+dup2:                       ; preds = %if.then96.i, %worklist, %while.cond.outer
+  %progress.1.ph = phi i32 [ 0, %while.cond.outer ], [ %progress.1, %if.then96.i ], [ %progress.1, %worklist ]
+  %.pr = load %struct.s1*, %struct.s1** %pp1, align 8
+  br label %dup1
+
+dup1:                                       ; preds = %dup2, %while.cond.outer
+  %0 = phi %struct.s1* [ %.pr, %dup2 ], [ undef, %while.cond.outer ]
+  %progress.1 = phi i32 [ %progress.1.ph, %dup2 ], [ %inc77, %while.cond.outer ]
+  br i1 %i1_1, label %while.cond.outer, label %worklist
+
+worklist:                                       ; preds = %dup1
+  %snode94 = getelementptr inbounds %struct.s1, %struct.s1* %0, i64 0, i32 0
+  %1 = load %struct.s3*, %struct.s3** %snode94, align 8
+  %2 = getelementptr inbounds %struct.s3, %struct.s3* %1, i32 0, i32 0
+  %3 = load i32, i32* %2, align 4
+  %tobool95.i = icmp eq i32 %3, 0
+  br i1 %tobool95.i, label %if.then96.i, label %dup2
+
+if.then96.i:                                      ; preds = %worklist
+  call fastcc void @free_s3(%struct.s2* %p2, %struct.s3* %1) #1
+  br label %dup2
+}
+
+; Function Attrs: nounwind
+declare fastcc void @free_s3(%struct.s2*, %struct.s3*) unnamed_addr #0
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
index bcc8af8d0690..c15793361896 100644
--- a/test/CodeGen/AArch64/tailcall-explicit-sret.ll
+++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-enable-ldst-opt=false -asm-verbose=false -disable-post-ra | FileCheck %s
 ; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
diff --git a/test/CodeGen/AArch64/tailcall-implicit-sret.ll b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
index 3955877b09b7..10c4ba4c31d6 100644
--- a/test/CodeGen/AArch64/tailcall-implicit-sret.ll
+++ b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -disable-post-ra -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-enable-ldst-opt=false -disable-post-ra -asm-verbose=false | FileCheck %s
 ; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
diff --git a/test/CodeGen/AArch64/tailcall_misched_graph.ll b/test/CodeGen/AArch64/tailcall_misched_graph.ll
index 59a3be905f17..0e4eb2b5fad9 100644
--- a/test/CodeGen/AArch64/tailcall_misched_graph.ll
+++ b/test/CodeGen/AArch64/tailcall_misched_graph.ll
@@ -37,8 +37,8 @@ declare void @callee2(i8*, i8*, i8*, i8*, i8*,
 ; CHECK: SU({{.*}}):   [[VRB]]<def> = LDRXui <fi#-2>
 ; CHECK-NOT: SU
 ; CHECK:  Successors:
-; CHECK:   ch  SU([[DEPSTOREB:.*]]): Latency=0
-; CHECK:   ch  SU([[DEPSTOREA:.*]]): Latency=0
+; CHECK:   ord  SU([[DEPSTOREB:.*]]): Latency=0
+; CHECK:   ord  SU([[DEPSTOREA:.*]]): Latency=0
 
 ; CHECK: SU([[DEPSTOREA]]):   STRXui %vreg{{.*}}, <fi#-4>
 ; CHECK: SU([[DEPSTOREB]]):   STRXui %vreg{{.*}}, <fi#-3>
diff --git a/test/CodeGen/AArch64/tailmerging_in_mbp.ll b/test/CodeGen/AArch64/tailmerging_in_mbp.ll
index d850801ee54a..eab296d6a640 100644
--- a/test/CodeGen/AArch64/tailmerging_in_mbp.ll
+++ b/test/CodeGen/AArch64/tailmerging_in_mbp.ll
@@ -1,4 +1,4 @@
-; RUN: llc <%s -march=aarch64 -verify-machine-dom-info | FileCheck %s
+; RUN: llc <%s -mtriple=aarch64-eabi -verify-machine-dom-info | FileCheck %s
 
 ; CHECK-LABEL: test:
 ; CHECK:       LBB0_7:
diff --git a/test/CodeGen/AArch64/tbz-tbnz.ll b/test/CodeGen/AArch64/tbz-tbnz.ll
index 2099333950ea..0dd265c18ec7 100644
--- a/test/CodeGen/AArch64/tbz-tbnz.ll
+++ b/test/CodeGen/AArch64/tbz-tbnz.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O1 -march=aarch64 < %s | FileCheck %s
+; RUN: llc < %s -O1 -mtriple=aarch64-eabi | FileCheck %s
 
 declare void @t()
 
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
index 345c4d9ba95a..2472bf45b6a9 100644
--- a/test/CodeGen/AArch64/tst-br.ll
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
 
 ; We've got the usual issues with LLVM reordering blocks here. The
 ; tests are correct for the current order, but who knows when that
diff --git a/test/CodeGen/AArch64/xray-attribute-instrumentation.ll b/test/CodeGen/AArch64/xray-attribute-instrumentation.ll
new file mode 100644
index 000000000000..d0f5f40e156c
--- /dev/null
+++ b/test/CodeGen/AArch64/xray-attribute-instrumentation.ll
@@ -0,0 +1,32 @@
+; RUN: llc -filetype=asm -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
+; CHECK-LABEL: Lxray_sled_0:
+; CHECK-NEXT:  b  #32
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-LABEL: Ltmp0:
+  ret i32 0
+; CHECK-LABEL: Lxray_sled_1:
+; CHECK-NEXT:  b  #32
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-LABEL: Ltmp1:
+; CHECK-NEXT:  ret
+}
+; CHECK:       .p2align 4
+; CHECK-NEXT:  .xword .Lxray_synthetic_0
+; CHECK-NEXT:  .section xray_instr_map,{{.*}}
+; CHECK-LABEL: Lxray_synthetic_0:
+; CHECK:       .xword .Lxray_sled_0
+; CHECK:       .xword .Lxray_sled_1
author	Dimitry Andric <dim@FreeBSD.org>	2017-01-02 19:17:04 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-01-02 19:17:04 +0000
commit	b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch)
tree	98b8f811c7aff2547cab8642daf372d6c59502fb /test/CodeGen/AArch64
parent	6421cca32f69ac849537a3cff78c352195e99f1b (diff)