18 files changed, 1597 insertions, 421 deletions
diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
index 66d9033a6d7cb..21c774133f896 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
@@ -12,6 +12,15 @@
   define void @test_fadd_s32() #0 { ret void }
   define void @test_fadd_s64() #0 { ret void }
 
+  define void @test_sub_s8() { ret void }
+  define void @test_sub_s16() { ret void }
+  define void @test_sub_s32() { ret void }
+
+  define void @test_mul_s8() #1 { ret void }
+  define void @test_mul_s16() #1 { ret void }
+  define void @test_mul_s32() #1 { ret void }
+  define void @test_mulv5_s32() { ret void }
+
   define void @test_load_from_stack() { ret void }
   define void @test_load_f32() #0 { ret void }
   define void @test_load_f64() #0 { ret void }
@@ -24,6 +33,7 @@
   define void @test_soft_fp_double() #0 { ret void }
 
   attributes #0 = { "target-features"="+vfp2,-neonfp" }
+  attributes #1 = { "target-features"="+v6" }
 ...
 ---
 name:            test_zext_s1
@@ -297,6 +307,237 @@ body:             |
     ; CHECK: BX_RET 14, _, implicit %d0
 ...
 ---
+name:            test_sub_s8
+# CHECK-LABEL: name: test_sub_s8
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK-DAG: id: 0, class: gpr
+# CHECK-DAG: id: 1, class: gpr
+# CHECK-DAG: id: 2, class: gpr
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s8) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s8) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s8) = G_SUB %0, %1
+    ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _
+
+    %r0 = COPY %2(s8)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_sub_s16
+# CHECK-LABEL: name: test_sub_s16
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK-DAG: id: 0, class: gpr
+# CHECK-DAG: id: 1, class: gpr
+# CHECK-DAG: id: 2, class: gpr
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s16) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s16) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s16) = G_SUB %0, %1
+    ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _
+
+    %r0 = COPY %2(s16)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_sub_s32
+# CHECK-LABEL: name: test_sub_s32
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK: id: 0, class: gpr
+# CHECK: id: 1, class: gpr
+# CHECK: id: 2, class: gpr
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s32) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s32) = G_SUB %0, %1
+    ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _
+
+    %r0 = COPY %2(s32)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_mul_s8
+# CHECK-LABEL: name: test_mul_s8
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK-DAG: id: 0, class: gprnopc
+# CHECK-DAG: id: 1, class: gprnopc
+# CHECK-DAG: id: 2, class: gprnopc
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s8) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s8) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s8) = G_MUL %0, %1
+    ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _
+
+    %r0 = COPY %2(s8)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_mul_s16
+# CHECK-LABEL: name: test_mul_s16
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK-DAG: id: 0, class: gprnopc
+# CHECK-DAG: id: 1, class: gprnopc
+# CHECK-DAG: id: 2, class: gprnopc
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s16) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s16) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s16) = G_MUL %0, %1
+    ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _
+
+    %r0 = COPY %2(s16)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_mul_s32
+# CHECK-LABEL: name: test_mul_s32
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK: id: 0, class: gprnopc
+# CHECK: id: 1, class: gprnopc
+# CHECK: id: 2, class: gprnopc
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s32) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s32) = G_MUL %0, %1
+    ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _
+
+    %r0 = COPY %2(s32)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_mulv5_s32
+# CHECK-LABEL: name: test_mulv5_s32
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK: id: 0, class: gprnopc
+# CHECK: id: 1, class: gprnopc
+# CHECK: id: 2, class: gprnopc
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s32) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s32) = G_MUL %0, %1
+    ; CHECK: early-clobber [[VREGRES:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _
+
+    %r0 = COPY %2(s32)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
 name:            test_load_from_stack
 # CHECK-LABEL: name: test_load_from_stack
 legalized:       true
diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index a7f5ec33bee3c..cf77ce352074d 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -35,6 +35,19 @@ entry:
   ret i8 %sum
 }
 
+define i8 @test_sub_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: name: test_sub_i8
+; CHECK: liveins: %r0, %r1
+; CHECK-DAG: [[VREGX:%[0-9]+]](s8) = COPY %r0
+; CHECK-DAG: [[VREGY:%[0-9]+]](s8) = COPY %r1
+; CHECK: [[RES:%[0-9]+]](s8) = G_SUB [[VREGX]], [[VREGY]]
+; CHECK: %r0 = COPY [[RES]](s8)
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+  %res = sub i8 %x, %y
+  ret i8 %res
+}
+
 define signext i8 @test_return_sext_i8(i8 %x) {
 ; CHECK-LABEL: name: test_return_sext_i8
 ; CHECK: liveins: %r0
@@ -59,6 +72,19 @@ entry:
   ret i16 %sum
 }
 
+define i16 @test_sub_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: name: test_sub_i16
+; CHECK: liveins: %r0, %r1
+; CHECK-DAG: [[VREGX:%[0-9]+]](s16) = COPY %r0
+; CHECK-DAG: [[VREGY:%[0-9]+]](s16) = COPY %r1
+; CHECK: [[RES:%[0-9]+]](s16) = G_SUB [[VREGX]], [[VREGY]]
+; CHECK: %r0 = COPY [[RES]](s16)
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+  %res = sub i16 %x, %y
+  ret i16 %res
+}
+
 define zeroext i16 @test_return_zext_i16(i16 %x) {
 ; CHECK-LABEL: name: test_return_zext_i16
 ; CHECK: liveins: %r0
@@ -83,6 +109,19 @@ entry:
   ret i32 %sum
 }
 
+define i32 @test_sub_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: name: test_sub_i32
+; CHECK: liveins: %r0, %r1
+; CHECK-DAG: [[VREGX:%[0-9]+]](s32) = COPY %r0
+; CHECK-DAG: [[VREGY:%[0-9]+]](s32) = COPY %r1
+; CHECK: [[RES:%[0-9]+]](s32) = G_SUB [[VREGX]], [[VREGY]]
+; CHECK: %r0 = COPY [[RES]](s32)
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+  %res = sub i32 %x, %y
+  ret i32 %res
+}
+
 define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
 ; CHECK-LABEL: name: test_stack_args
 ; CHECK: fixedStack:
diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
index 236dcbeb84c52..f3ca2915f306e 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel %s -o - | FileCheck %s
+; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v6 -global-isel %s -o - | FileCheck %s
 
 define void @test_void_return() {
 ; CHECK-LABEL: test_void_return:
@@ -67,6 +67,60 @@ entry:
   ret i32 %sum
 }
 
+define i8 @test_sub_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_sub_i8:
+; CHECK: sub r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = sub i8 %x, %y
+  ret i8 %sum
+}
+
+define i16 @test_sub_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_sub_i16:
+; CHECK: sub r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = sub i16 %x, %y
+  ret i16 %sum
+}
+
+define i32 @test_sub_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_sub_i32:
+; CHECK: sub r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = sub i32 %x, %y
+  ret i32 %sum
+}
+
+define i8 @test_mul_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_mul_i8:
+; CHECK: mul r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = mul i8 %x, %y
+  ret i8 %sum
+}
+
+define i16 @test_mul_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_mul_i16:
+; CHECK: mul r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = mul i16 %x, %y
+  ret i16 %sum
+}
+
+define i32 @test_mul_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_mul_i32:
+; CHECK: mul r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = mul i32 %x, %y
+  ret i32 %sum
+}
+
 define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
 ; CHECK-LABEL: test_stack_args_i32:
 ; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4
diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
index cbff7e12fb77c..625d35acf17b9 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
@@ -7,6 +7,14 @@
   define void @test_add_s16() { ret void }
   define void @test_add_s32() { ret void }
 
+  define void @test_sub_s8() { ret void }
+  define void @test_sub_s16() { ret void }
+  define void @test_sub_s32() { ret void }
+
+  define void @test_mul_s8() { ret void }
+  define void @test_mul_s16() { ret void }
+  define void @test_mul_s32() { ret void }
+
   define void @test_load_from_stack() { ret void }
   define void @test_legal_loads() #0 { ret void }
   define void @test_legal_stores() #0 { ret void }
@@ -139,6 +147,154 @@ body:             |
 
 ...
 ---
+name:            test_sub_s8
+# CHECK-LABEL: name: test_sub_s8
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s8) = COPY %r0
+    %1(s8) = COPY %r1
+    %2(s8) = G_SUB %0, %1
+    ; G_SUB with s8 is legal, so we should find it unchanged in the output
+    ; CHECK: {{%[0-9]+}}(s8) = G_SUB {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s8)
+    BX_RET 14, _, implicit %r0
+...
+---
+name:            test_sub_s16
+# CHECK-LABEL: name: test_sub_s16
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s16) = COPY %r0
+    %1(s16) = COPY %r1
+    %2(s16) = G_SUB %0, %1
+    ; G_SUB with s16 is legal, so we should find it unchanged in the output
+    ; CHECK: {{%[0-9]+}}(s16) = G_SUB {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s16)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_sub_s32
+# CHECK-LABEL: name: test_sub_s32
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_SUB %0, %1
+    ; G_SUB with s32 is legal, so we should find it unchanged in the output
+    ; CHECK: {{%[0-9]+}}(s32) = G_SUB {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_mul_s8
+# CHECK-LABEL: name: test_mul_s8
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s8) = COPY %r0
+    %1(s8) = COPY %r1
+    %2(s8) = G_MUL %0, %1
+    ; G_MUL with s8 is legal, so we should find it unchanged in the output
+    ; CHECK: {{%[0-9]+}}(s8) = G_MUL {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s8)
+    BX_RET 14, _, implicit %r0
+...
+---
+name:            test_mul_s16
+# CHECK-LABEL: name: test_mul_s16
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s16) = COPY %r0
+    %1(s16) = COPY %r1
+    %2(s16) = G_MUL %0, %1
+    ; G_MUL with s16 is legal, so we should find it unchanged in the output
+    ; CHECK: {{%[0-9]+}}(s16) = G_MUL {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s16)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_mul_s32
+# CHECK-LABEL: name: test_mul_s32
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_MUL %0, %1
+    ; G_MUL with s32 is legal, so we should find it unchanged in the output
+    ; CHECK: {{%[0-9]+}}(s32) = G_MUL {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
+...
+---
 name:            test_load_from_stack
 # CHECK-LABEL: name: test_load_from_stack
 legalized:       false
diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
index fbf8d81322f8f..e7935832f98a8 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
@@ -5,6 +5,14 @@
   define void @test_add_s8() { ret void }
   define void @test_add_s1() { ret void }
 
+  define void @test_sub_s32() { ret void }
+  define void @test_sub_s16() { ret void }
+  define void @test_sub_s8() { ret void }
+
+  define void @test_mul_s32() { ret void }
+  define void @test_mul_s16() { ret void }
+  define void @test_mul_s8() { ret void }
+
   define void @test_loads() #0 { ret void }
   define void @test_stores() #0 { ret void }
 
@@ -126,6 +134,162 @@ body:             |
 
 ...
 ---
+name:            test_sub_s32
+# CHECK-LABEL: name: test_sub_s32
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_SUB %0, %1
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_sub_s16
+# CHECK-LABEL: name: test_sub_s16
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s16) = COPY %r0
+    %1(s16) = COPY %r1
+    %2(s16) = G_SUB %0, %1
+    %r0 = COPY %2(s16)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_sub_s8
+# CHECK-LABEL: name: test_sub_s8
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s8) = COPY %r0
+    %1(s8) = COPY %r1
+    %2(s8) = G_SUB %0, %1
+    %r0 = COPY %2(s8)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_mul_s32
+# CHECK-LABEL: name: test_mul_s32
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_MUL %0, %1
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_mul_s16
+# CHECK-LABEL: name: test_mul_s16
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s16) = COPY %r0
+    %1(s16) = COPY %r1
+    %2(s16) = G_MUL %0, %1
+    %r0 = COPY %2(s16)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_mul_s8
+# CHECK-LABEL: name: test_mul_s8
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s8) = COPY %r0
+    %1(s8) = COPY %r1
+    %2(s8) = G_MUL %0, %1
+    %r0 = COPY %2(s8)
+    BX_RET 14, _, implicit %r0
+
+...
+---
 name:            test_loads
 # CHECK-LABEL: name: test_loads
 legalized:       true
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
index 0e077b3aee5a1..64c279b0f2187 100644
--- a/test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -7,31 +7,32 @@
 
 define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
 entry:
-; CHECK-LABEL: test1
-; CHECK:	ldr	r[[R1:[0-9]+]], [pc, r1]
-; CHECK:	add	r[[R2:[0-9]+]], r1, #48
-; CHECK:	vld1.64	 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	mov	r[[R2:[0-9]+]], r[[R1]]
-; CHECK:	vld1.32	 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK:	vld1.64	 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r[[R1]], #32
-; CHECK:	vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	mov	r[[R1:[0-9]+]], sp
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	add	r[[R2:[0-9]+]], r[[R1]], #32
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r0, #48
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r0, #32
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
+; CHECK-LABEL: test1:
+; CHECK: ldr     r[[R1:[0-9]+]], [pc, r[[R1]]]
+; CHECK: mov     r[[R2:[0-9]+]], r[[R1]]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R2:[0-9]+]], r[[R1]], #48
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: mov     r[[R1:[0-9]+]], #32
+; CHECK: mov     r[[R2:[0-9]+]], sp
+; CHECK: mov     r[[R3:[0-9]+]], r[[R2]]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]]
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r0, #48
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r0, #32
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
  %retval = alloca <16 x float>, align 16
  %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
@@ -42,30 +43,32 @@ entry:
 
 define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
 entry:
-; CHECK:	ldr	r[[R1:[0-9]+]], [pc, r1]
-; CHECK:	add	r[[R2:[0-9]+]], r[[R1]], #48
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	mov	r[[R2:[0-9]+]], r[[R1]]
-; CHECK:	vld1.32	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r[[R1]], #32
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	mov	r[[R1:[0-9]+]], sp
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	orr	r[[R2:[0-9]+]], r[[R1]], #32
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	vld1.32	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vld1.32	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r0, #48
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r0, #32
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vst1.32	{{{d[0-9]+, d[0-9]+}}}, [r0:128]!
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r0:128]
+; CHECK-LABEL: test2:
+; CHECK: ldr     r[[R1:[0-9]+]], [pc, r[[R1]]]
+; CHECK: mov     r[[R2:[0-9]+]], r[[R1]]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R2:[0-9]+]], r[[R1]], #48
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: mov     r[[R1:[0-9]+]], #32
+; CHECK: mov     r[[R2:[0-9]+]], sp
+; CHECK: mov     r[[R3:[0-9]+]], r[[R2]]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]]
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r0, #48
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r0, #32
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
 
 
 %retval = alloca <16 x float>, align 16
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index fc85a3a2e6834..699ef6e92a4ff 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -231,6 +231,11 @@
 ; V6:   .eabi_attribute 6, 6
 ; V6:   .eabi_attribute 8, 1
 ;; We assume round-to-nearest by default (matches GCC)
+; V6-NOT:   .eabi_attribute 27
+; V6-NOT:    .eabi_attribute 36
+; V6-NOT:    .eabi_attribute 42
+; V6-NOT:  .eabi_attribute 44
+; V6-NOT:    .eabi_attribute 68
 ; V6-NOT:   .eabi_attribute 19
 ;; The default choice made by llc is for a V6 CPU without an FPU.
 ;; This is not an interesting detail, but for such CPUs, the default intention is to use
@@ -242,13 +247,8 @@
 ; V6:   .eabi_attribute 23, 3
 ; V6:   .eabi_attribute 24, 1
 ; V6:   .eabi_attribute 25, 1
-; V6-NOT:   .eabi_attribute 27
 ; V6-NOT:   .eabi_attribute 28
-; V6-NOT:    .eabi_attribute 36
 ; V6:    .eabi_attribute 38, 1
-; V6-NOT:    .eabi_attribute 42
-; V6-NOT:  .eabi_attribute 44
-; V6-NOT:    .eabi_attribute 68
 
 ; V6-FAST-NOT:   .eabi_attribute 19
 ;; Despite the V6 CPU having no FPU by default, we chose to flush to
@@ -262,9 +262,14 @@
 ;; We emit 6, 12 for both v6-M and v6S-M, technically this is incorrect for
 ;; V6-M, however we don't model the OS extension so this is fine.
 ; V6M:  .eabi_attribute 6, 12
-; V6M-NOT:  .eabi_attribute 7
+; V6M:  .eabi_attribute 7, 77
 ; V6M:  .eabi_attribute 8, 0
 ; V6M:  .eabi_attribute 9, 1
+; V6M-NOT:  .eabi_attribute 27
+; V6M-NOT:  .eabi_attribute 36
+; V6M-NOT:  .eabi_attribute 42
+; V6M-NOT:  .eabi_attribute 44
+; V6M-NOT:  .eabi_attribute 68
 ; V6M-NOT:   .eabi_attribute 19
 ;; The default choice made by llc is for a V6M CPU without an FPU.
 ;; This is not an interesting detail, but for such CPUs, the default intention is to use
@@ -276,13 +281,8 @@
 ; V6M:   .eabi_attribute 23, 3
 ; V6M:  .eabi_attribute 24, 1
 ; V6M:  .eabi_attribute 25, 1
-; V6M-NOT:  .eabi_attribute 27
 ; V6M-NOT:  .eabi_attribute 28
-; V6M-NOT:  .eabi_attribute 36
 ; V6M:  .eabi_attribute 38, 1
-; V6M-NOT:  .eabi_attribute 42
-; V6M-NOT:  .eabi_attribute 44
-; V6M-NOT:  .eabi_attribute 68
 
 ; V6M-FAST-NOT:   .eabi_attribute 19
 ;; Despite the V6M CPU having no FPU by default, we chose to flush to
@@ -298,6 +298,11 @@
 ; ARM1156T2F-S: .eabi_attribute 8, 1
 ; ARM1156T2F-S: .eabi_attribute 9, 2
 ; ARM1156T2F-S: .fpu vfpv2
+; ARM1156T2F-S-NOT: .eabi_attribute 27
+; ARM1156T2F-S-NOT: .eabi_attribute 36
+; ARM1156T2F-S-NOT:    .eabi_attribute 42
+; ARM1156T2F-S-NOT:    .eabi_attribute 44
+; ARM1156T2F-S-NOT:    .eabi_attribute 68
 ; ARM1156T2F-S-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; ARM1156T2F-S: .eabi_attribute 20, 1
@@ -306,13 +311,8 @@
 ; ARM1156T2F-S: .eabi_attribute 23, 3
 ; ARM1156T2F-S: .eabi_attribute 24, 1
 ; ARM1156T2F-S: .eabi_attribute 25, 1
-; ARM1156T2F-S-NOT: .eabi_attribute 27
 ; ARM1156T2F-S-NOT: .eabi_attribute 28
-; ARM1156T2F-S-NOT: .eabi_attribute 36
 ; ARM1156T2F-S: .eabi_attribute 38, 1
-; ARM1156T2F-S-NOT:    .eabi_attribute 42
-; ARM1156T2F-S-NOT:    .eabi_attribute 44
-; ARM1156T2F-S-NOT:    .eabi_attribute 68
 
 ; ARM1156T2F-S-FAST-NOT:   .eabi_attribute 19
 ;; V6 cores default to flush to positive zero (value 0). Note that value 2 is also equally
@@ -327,6 +327,11 @@
 ; V7M:  .eabi_attribute 7, 77
 ; V7M:  .eabi_attribute 8, 0
 ; V7M:  .eabi_attribute 9, 2
+; V7M-NOT:  .eabi_attribute 27
+; V7M-NOT:  .eabi_attribute 36
+; V7M-NOT:  .eabi_attribute 42
+; V7M-NOT:  .eabi_attribute 44
+; V7M-NOT:  .eabi_attribute 68
 ; V7M-NOT:   .eabi_attribute 19
 ;; The default choice made by llc is for a V7M CPU without an FPU.
 ;; This is not an interesting detail, but for such CPUs, the default intention is to use
@@ -338,13 +343,8 @@
 ; V7M: .eabi_attribute 23, 3
 ; V7M:  .eabi_attribute 24, 1
 ; V7M:  .eabi_attribute 25, 1
-; V7M-NOT:  .eabi_attribute 27
 ; V7M-NOT:  .eabi_attribute 28
-; V7M-NOT:  .eabi_attribute 36
 ; V7M:  .eabi_attribute 38, 1
-; V7M-NOT:  .eabi_attribute 42
-; V7M-NOT:  .eabi_attribute 44
-; V7M-NOT:  .eabi_attribute 68
 
 ; V7M-FAST-NOT:   .eabi_attribute 19
 ;; Despite the V7M CPU having no FPU by default, we chose to flush
@@ -357,6 +357,11 @@
 
 ; V7:      .syntax unified
 ; V7: .eabi_attribute 6, 10
+; V7-NOT: .eabi_attribute 27
+; V7-NOT: .eabi_attribute 36
+; V7-NOT:    .eabi_attribute 42
+; V7-NOT:    .eabi_attribute 44
+; V7-NOT:    .eabi_attribute 68
 ; V7-NOT:   .eabi_attribute 19
 ;; In safe-maths mode we default to an IEEE 754 compliant choice.
 ; V7: .eabi_attribute 20, 1
@@ -365,13 +370,8 @@
 ; V7: .eabi_attribute 23, 3
 ; V7: .eabi_attribute 24, 1
 ; V7: .eabi_attribute 25, 1
-; V7-NOT: .eabi_attribute 27
 ; V7-NOT: .eabi_attribute 28
-; V7-NOT: .eabi_attribute 36
 ; V7: .eabi_attribute 38, 1
-; V7-NOT:    .eabi_attribute 42
-; V7-NOT:    .eabi_attribute 44
-; V7-NOT:    .eabi_attribute 68
 
 ; V7-FAST-NOT:   .eabi_attribute 19
 ;; The default CPU does have an FPU and it must be VFPv3 or better, so it flushes
@@ -386,6 +386,9 @@
 ; V7VE: .eabi_attribute 7, 65   @ Tag_CPU_arch_profile
 ; V7VE: .eabi_attribute 8, 1    @ Tag_ARM_ISA_use
 ; V7VE: .eabi_attribute 9, 2    @ Tag_THUMB_ISA_use
+; V7VE: .eabi_attribute 42, 1   @ Tag_MPextension_use
+; V7VE: .eabi_attribute 44, 2   @ Tag_DIV_use
+; V7VE: .eabi_attribute 68, 3   @ Tag_Virtualization_use
 ; V7VE: .eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use
 ; V7VE: .eabi_attribute 20, 1   @ Tag_ABI_FP_denormal
 ; V7VE: .eabi_attribute 21, 1   @ Tag_ABI_FP_exceptions
@@ -393,19 +396,16 @@
 ; V7VE: .eabi_attribute 24, 1   @ Tag_ABI_align_needed
 ; V7VE: .eabi_attribute 25, 1   @ Tag_ABI_align_preserved
 ; V7VE: .eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format
-; V7VE: .eabi_attribute 42, 1   @ Tag_MPextension_use
-; V7VE: .eabi_attribute 44, 2   @ Tag_DIV_use
-; V7VE: .eabi_attribute 68, 3   @ Tag_Virtualization_use
 
 ; V8:      .syntax unified
 ; V8: .eabi_attribute 67, "2.09"
 ; V8: .eabi_attribute 6, 14
+; V8-NOT: .eabi_attribute 44
 ; V8-NOT:   .eabi_attribute 19
 ; V8: .eabi_attribute 20, 1
 ; V8: .eabi_attribute 21, 1
 ; V8-NOT: .eabi_attribute 22
 ; V8: .eabi_attribute 23, 3
-; V8-NOT: .eabi_attribute 44
 
 ; V8-FAST-NOT:   .eabi_attribute 19
 ;; The default does have an FPU, and for V8-A, it flushes preserving sign.
@@ -496,6 +496,30 @@
 ; CORTEX-A7-FPUV4: .fpu vfpv4
 
 ; CORTEX-A7-CHECK-NOT:   .eabi_attribute 19
+
+; Tag_FP_HP_extension
+; CORTEX-A7-CHECK: .eabi_attribute      36, 1
+; CORTEX-A7-NOFPU-NOT: .eabi_attribute  36
+; CORTEX-A7-FPUV4: .eabi_attribute      36, 1
+
+; Tag_MPextension_use
+; CORTEX-A7-CHECK: .eabi_attribute      42, 1
+; CORTEX-A7-NOFPU: .eabi_attribute      42, 1
+; CORTEX-A7-FPUV4: .eabi_attribute      42, 1
+
+; Tag_DIV_use
+; CORTEX-A7-CHECK: .eabi_attribute      44, 2
+; CORTEX-A7-NOFPU: .eabi_attribute      44, 2
+; CORTEX-A7-FPUV4: .eabi_attribute      44, 2
+
+; Tag_DSP_extension
+; CORTEX-A7-CHECK-NOT: .eabi_attribute      46
+
+; Tag_Virtualization_use
+; CORTEX-A7-CHECK: .eabi_attribute      68, 3
+; CORTEX-A7-NOFPU: .eabi_attribute      68, 3
+; CORTEX-A7-FPUV4: .eabi_attribute      68, 3
+
 ; Tag_ABI_FP_denormal
 ;; We default to IEEE 754 compliance
 ; CORTEX-A7-CHECK: .eabi_attribute      20, 1
@@ -535,40 +559,20 @@
 ; CORTEX-A7-NOFPU: .eabi_attribute      25, 1
 ; CORTEX-A7-FPUV4: .eabi_attribute      25, 1
 
-; Tag_FP_HP_extension
-; CORTEX-A7-CHECK: .eabi_attribute      36, 1
-; CORTEX-A7-NOFPU-NOT: .eabi_attribute  36
-; CORTEX-A7-FPUV4: .eabi_attribute      36, 1
-
 ; Tag_FP_16bit_format
 ; CORTEX-A7-CHECK: .eabi_attribute      38, 1
 ; CORTEX-A7-NOFPU: .eabi_attribute      38, 1
 ; CORTEX-A7-FPUV4: .eabi_attribute      38, 1
 
-; Tag_MPextension_use
-; CORTEX-A7-CHECK: .eabi_attribute      42, 1
-; CORTEX-A7-NOFPU: .eabi_attribute      42, 1
-; CORTEX-A7-FPUV4: .eabi_attribute      42, 1
-
-; Tag_DIV_use
-; CORTEX-A7-CHECK: .eabi_attribute      44, 2
-; CORTEX-A7-NOFPU: .eabi_attribute      44, 2
-; CORTEX-A7-FPUV4: .eabi_attribute      44, 2
-
-; Tag_DSP_extension
-; CORTEX-A7-CHECK-NOT: .eabi_attribute      46
-
-; Tag_Virtualization_use
-; CORTEX-A7-CHECK: .eabi_attribute      68, 3
-; CORTEX-A7-NOFPU: .eabi_attribute      68, 3
-; CORTEX-A7-FPUV4: .eabi_attribute      68, 3
-
 ; CORTEX-A5-DEFAULT:        .cpu    cortex-a5
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 6, 10
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 7, 65
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 8, 1
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 9, 2
 ; CORTEX-A5-DEFAULT:        .fpu    neon-vfpv4
+; CORTEX-A5-DEFAULT:        .eabi_attribute 42, 1
+; CORTEX-A5-DEFAULT-NOT:        .eabi_attribute 44
+; CORTEX-A5-DEFAULT:        .eabi_attribute 68, 1
 ; CORTEX-A5-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 20, 1
@@ -577,9 +581,6 @@
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 23, 3
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 24, 1
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 25, 1
-; CORTEX-A5-DEFAULT:        .eabi_attribute 42, 1
-; CORTEX-A5-DEFAULT-NOT:        .eabi_attribute 44
-; CORTEX-A5-DEFAULT:        .eabi_attribute 68, 1
 
 ; CORTEX-A5-DEFAULT-FAST-NOT:   .eabi_attribute 19
 ;; The A5 defaults to a VFPv4 FPU, so it flushed preserving the sign when -ffast-math
@@ -595,6 +596,8 @@
 ; CORTEX-A5-NONEON:        .eabi_attribute 8, 1
 ; CORTEX-A5-NONEON:        .eabi_attribute 9, 2
 ; CORTEX-A5-NONEON:        .fpu    vfpv4-d16
+; CORTEX-A5-NONEON:        .eabi_attribute 42, 1
+; CORTEX-A5-NONEON:        .eabi_attribute 68, 1
 ;; We default to IEEE 754 compliance
 ; CORTEX-A5-NONEON:        .eabi_attribute 20, 1
 ; CORTEX-A5-NONEON:        .eabi_attribute 21, 1
@@ -602,8 +605,6 @@
 ; CORTEX-A5-NONEON:        .eabi_attribute 23, 3
 ; CORTEX-A5-NONEON:        .eabi_attribute 24, 1
 ; CORTEX-A5-NONEON:        .eabi_attribute 25, 1
-; CORTEX-A5-NONEON:        .eabi_attribute 42, 1
-; CORTEX-A5-NONEON:        .eabi_attribute 68, 1
 
 ; CORTEX-A5-NONEON-FAST-NOT:   .eabi_attribute 19
 ;; The A5 defaults to a VFPv4 FPU, so it flushed preserving sign when -ffast-math
@@ -619,6 +620,8 @@
 ; CORTEX-A5-NOFPU:        .eabi_attribute 8, 1
 ; CORTEX-A5-NOFPU:        .eabi_attribute 9, 2
 ; CORTEX-A5-NOFPU-NOT:    .fpu
+; CORTEX-A5-NOFPU:        .eabi_attribute 42, 1
+; CORTEX-A5-NOFPU:        .eabi_attribute 68, 1
 ; CORTEX-A5-NOFPU-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A5-NOFPU:        .eabi_attribute 20, 1
@@ -627,8 +630,6 @@
 ; CORTEX-A5-NOFPU:        .eabi_attribute 23, 3
 ; CORTEX-A5-NOFPU:        .eabi_attribute 24, 1
 ; CORTEX-A5-NOFPU:        .eabi_attribute 25, 1
-; CORTEX-A5-NOFPU:        .eabi_attribute 42, 1
-; CORTEX-A5-NOFPU:        .eabi_attribute 68, 1
 
 ; CORTEX-A5-NOFPU-FAST-NOT:   .eabi_attribute 19
 ;; Despite there being no FPU, we chose to flush to zero preserving
@@ -645,6 +646,11 @@
 ; CORTEX-A8-SOFT:  .eabi_attribute 8, 1
 ; CORTEX-A8-SOFT:  .eabi_attribute 9, 2
 ; CORTEX-A8-SOFT:  .fpu neon
+; CORTEX-A8-SOFT-NOT:  .eabi_attribute 27
+; CORTEX-A8-SOFT-NOT:  .eabi_attribute 36, 1
+; CORTEX-A8-SOFT-NOT:  .eabi_attribute 42, 1
+; CORTEX-A8-SOFT-NOT:  .eabi_attribute 44
+; CORTEX-A8-SOFT:  .eabi_attribute 68, 1
 ; CORTEX-A8-SOFT-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A8-SOFT:  .eabi_attribute 20, 1
@@ -653,13 +659,8 @@
 ; CORTEX-A8-SOFT:  .eabi_attribute 23, 3
 ; CORTEX-A8-SOFT:  .eabi_attribute 24, 1
 ; CORTEX-A8-SOFT:  .eabi_attribute 25, 1
-; CORTEX-A8-SOFT-NOT:  .eabi_attribute 27
 ; CORTEX-A8-SOFT-NOT:  .eabi_attribute 28
-; CORTEX-A8-SOFT-NOT:  .eabi_attribute 36, 1
 ; CORTEX-A8-SOFT:  .eabi_attribute 38, 1
-; CORTEX-A8-SOFT-NOT:  .eabi_attribute 42, 1
-; CORTEX-A8-SOFT-NOT:  .eabi_attribute 44
-; CORTEX-A8-SOFT:  .eabi_attribute 68, 1
 
 ; CORTEX-A9-SOFT:  .cpu cortex-a9
 ; CORTEX-A9-SOFT:  .eabi_attribute 6, 10
@@ -667,6 +668,11 @@
 ; CORTEX-A9-SOFT:  .eabi_attribute 8, 1
 ; CORTEX-A9-SOFT:  .eabi_attribute 9, 2
 ; CORTEX-A9-SOFT:  .fpu neon
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 27
+; CORTEX-A9-SOFT:  .eabi_attribute 36, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 42, 1
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 44
+; CORTEX-A9-SOFT:  .eabi_attribute 68, 1
 ; CORTEX-A9-SOFT-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A9-SOFT:  .eabi_attribute 20, 1
@@ -675,13 +681,8 @@
 ; CORTEX-A9-SOFT:  .eabi_attribute 23, 3
 ; CORTEX-A9-SOFT:  .eabi_attribute 24, 1
 ; CORTEX-A9-SOFT:  .eabi_attribute 25, 1
-; CORTEX-A9-SOFT-NOT:  .eabi_attribute 27
 ; CORTEX-A9-SOFT-NOT:  .eabi_attribute 28
-; CORTEX-A9-SOFT:  .eabi_attribute 36, 1
 ; CORTEX-A9-SOFT:  .eabi_attribute 38, 1
-; CORTEX-A9-SOFT:  .eabi_attribute 42, 1
-; CORTEX-A9-SOFT-NOT:  .eabi_attribute 44
-; CORTEX-A9-SOFT:  .eabi_attribute 68, 1
 
 ; CORTEX-A8-SOFT-FAST-NOT:   .eabi_attribute 19
 ; CORTEX-A9-SOFT-FAST-NOT:   .eabi_attribute 19
@@ -699,6 +700,10 @@
 ; CORTEX-A8-HARD:  .eabi_attribute 8, 1
 ; CORTEX-A8-HARD:  .eabi_attribute 9, 2
 ; CORTEX-A8-HARD:  .fpu neon
+; CORTEX-A8-HARD-NOT:  .eabi_attribute 27
+; CORTEX-A8-HARD-NOT:  .eabi_attribute 36, 1
+; CORTEX-A8-HARD-NOT:  .eabi_attribute 42, 1
+; CORTEX-A8-HARD:  .eabi_attribute 68, 1
 ; CORTEX-A8-HARD-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A8-HARD:  .eabi_attribute 20, 1
@@ -707,12 +712,8 @@
 ; CORTEX-A8-HARD:  .eabi_attribute 23, 3
 ; CORTEX-A8-HARD:  .eabi_attribute 24, 1
 ; CORTEX-A8-HARD:  .eabi_attribute 25, 1
-; CORTEX-A8-HARD-NOT:  .eabi_attribute 27
 ; CORTEX-A8-HARD:  .eabi_attribute 28, 1
-; CORTEX-A8-HARD-NOT:  .eabi_attribute 36, 1
 ; CORTEX-A8-HARD:  .eabi_attribute 38, 1
-; CORTEX-A8-HARD-NOT:  .eabi_attribute 42, 1
-; CORTEX-A8-HARD:  .eabi_attribute 68, 1
 
 
 
@@ -722,6 +723,10 @@
 ; CORTEX-A9-HARD:  .eabi_attribute 8, 1
 ; CORTEX-A9-HARD:  .eabi_attribute 9, 2
 ; CORTEX-A9-HARD:  .fpu neon
+; CORTEX-A9-HARD-NOT:  .eabi_attribute 27
+; CORTEX-A9-HARD:  .eabi_attribute 36, 1
+; CORTEX-A9-HARD:  .eabi_attribute 42, 1
+; CORTEX-A9-HARD:  .eabi_attribute 68, 1
 ; CORTEX-A9-HARD-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A9-HARD:  .eabi_attribute 20, 1
@@ -730,12 +735,8 @@
 ; CORTEX-A9-HARD:  .eabi_attribute 23, 3
 ; CORTEX-A9-HARD:  .eabi_attribute 24, 1
 ; CORTEX-A9-HARD:  .eabi_attribute 25, 1
-; CORTEX-A9-HARD-NOT:  .eabi_attribute 27
 ; CORTEX-A9-HARD:  .eabi_attribute 28, 1
-; CORTEX-A9-HARD:  .eabi_attribute 36, 1
 ; CORTEX-A9-HARD:  .eabi_attribute 38, 1
-; CORTEX-A9-HARD:  .eabi_attribute 42, 1
-; CORTEX-A9-HARD:  .eabi_attribute 68, 1
 
 ; CORTEX-A8-HARD-FAST-NOT:   .eabi_attribute 19
 ;; The A8 defaults to a VFPv3 FPU, so it flushes preserving the sign when
@@ -759,6 +760,9 @@
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 8, 1
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 9, 2
 ; CORTEX-A12-DEFAULT:  .fpu neon-vfpv4
+; CORTEX-A12-DEFAULT:  .eabi_attribute 42, 1
+; CORTEX-A12-DEFAULT:  .eabi_attribute 44, 2
+; CORTEX-A12-DEFAULT:  .eabi_attribute 68, 3
 ; CORTEX-A12-DEFAULT-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 20, 1
@@ -767,9 +771,6 @@
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 23, 3
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 24, 1
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 25, 1
-; CORTEX-A12-DEFAULT:  .eabi_attribute 42, 1
-; CORTEX-A12-DEFAULT:  .eabi_attribute 44, 2
-; CORTEX-A12-DEFAULT:  .eabi_attribute 68, 3
 
 ; CORTEX-A12-DEFAULT-FAST-NOT:   .eabi_attribute 19
 ;; The A12 defaults to a VFPv3 FPU, so it flushes preserving the sign when
@@ -785,6 +786,9 @@
 ; CORTEX-A12-NOFPU:  .eabi_attribute 8, 1
 ; CORTEX-A12-NOFPU:  .eabi_attribute 9, 2
 ; CORTEX-A12-NOFPU-NOT:  .fpu
+; CORTEX-A12-NOFPU:  .eabi_attribute 42, 1
+; CORTEX-A12-NOFPU:  .eabi_attribute 44, 2
+; CORTEX-A12-NOFPU:  .eabi_attribute 68, 3
 ; CORTEX-A12-NOFPU-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A12-NOFPU:  .eabi_attribute 20, 1
@@ -793,9 +797,6 @@
 ; CORTEX-A12-NOFPU:  .eabi_attribute 23, 3
 ; CORTEX-A12-NOFPU:  .eabi_attribute 24, 1
 ; CORTEX-A12-NOFPU:  .eabi_attribute 25, 1
-; CORTEX-A12-NOFPU:  .eabi_attribute 42, 1
-; CORTEX-A12-NOFPU:  .eabi_attribute 44, 2
-; CORTEX-A12-NOFPU:  .eabi_attribute 68, 3
 
 ; CORTEX-A12-NOFPU-FAST-NOT:   .eabi_attribute 19
 ;; Despite there being no FPU, we chose to flush to zero preserving
@@ -812,6 +813,11 @@
 ; CORTEX-A15: .eabi_attribute 8, 1
 ; CORTEX-A15: .eabi_attribute 9, 2
 ; CORTEX-A15: .fpu neon-vfpv4
+; CORTEX-A15-NOT: .eabi_attribute 27
+; CORTEX-A15: .eabi_attribute 36, 1
+; CORTEX-A15: .eabi_attribute 42, 1
+; CORTEX-A15: .eabi_attribute 44, 2
+; CORTEX-A15: .eabi_attribute 68, 3
 ; CORTEX-A15-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A15: .eabi_attribute 20, 1
@@ -820,13 +826,8 @@
 ; CORTEX-A15: .eabi_attribute 23, 3
 ; CORTEX-A15: .eabi_attribute 24, 1
 ; CORTEX-A15: .eabi_attribute 25, 1
-; CORTEX-A15-NOT: .eabi_attribute 27
 ; CORTEX-A15-NOT: .eabi_attribute 28
-; CORTEX-A15: .eabi_attribute 36, 1
 ; CORTEX-A15: .eabi_attribute 38, 1
-; CORTEX-A15: .eabi_attribute 42, 1
-; CORTEX-A15: .eabi_attribute 44, 2
-; CORTEX-A15: .eabi_attribute 68, 3
 
 ; CORTEX-A15-FAST-NOT:   .eabi_attribute 19
 ;; The A15 defaults to a VFPv3 FPU, so it flushes preserving the sign when
@@ -842,6 +843,9 @@
 ; CORTEX-A17-DEFAULT:  .eabi_attribute 8, 1
 ; CORTEX-A17-DEFAULT:  .eabi_attribute 9, 2
 ; CORTEX-A17-DEFAULT:  .fpu neon-vfpv4
+; CORTEX-A17-DEFAULT:  .eabi_attribute 42, 1
+; CORTEX-A17-DEFAULT:  .eabi_attribute 44, 2
+; CORTEX-A17-DEFAULT:  .eabi_attribute 68, 3
 ; CORTEX-A17-DEFAULT-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A17-DEFAULT:  .eabi_attribute 20, 1
@@ -850,9 +854,6 @@
 ; CORTEX-A17-DEFAULT:  .eabi_attribute 23, 3
 ; CORTEX-A17-DEFAULT:  .eabi_attribute 24, 1
 ; CORTEX-A17-DEFAULT:  .eabi_attribute 25, 1
-; CORTEX-A17-DEFAULT:  .eabi_attribute 42, 1
-; CORTEX-A17-DEFAULT:  .eabi_attribute 44, 2
-; CORTEX-A17-DEFAULT:  .eabi_attribute 68, 3
 
 ; CORTEX-A17-FAST-NOT:   .eabi_attribute 19
 ;; The A17 defaults to a VFPv3 FPU, so it flushes preserving the sign when
@@ -868,6 +869,9 @@
 ; CORTEX-A17-NOFPU:  .eabi_attribute 8, 1
 ; CORTEX-A17-NOFPU:  .eabi_attribute 9, 2
 ; CORTEX-A17-NOFPU-NOT:  .fpu
+; CORTEX-A17-NOFPU:  .eabi_attribute 42, 1
+; CORTEX-A17-NOFPU:  .eabi_attribute 44, 2
+; CORTEX-A17-NOFPU:  .eabi_attribute 68, 3
 ; CORTEX-A17-NOFPU-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A17-NOFPU:  .eabi_attribute 20, 1
@@ -876,9 +880,6 @@
 ; CORTEX-A17-NOFPU:  .eabi_attribute 23, 3
 ; CORTEX-A17-NOFPU:  .eabi_attribute 24, 1
 ; CORTEX-A17-NOFPU:  .eabi_attribute 25, 1
-; CORTEX-A17-NOFPU:  .eabi_attribute 42, 1
-; CORTEX-A17-NOFPU:  .eabi_attribute 44, 2
-; CORTEX-A17-NOFPU:  .eabi_attribute 68, 3
 
 ; CORTEX-A17-NOFPU-NOT:   .eabi_attribute 19
 ;; Despite there being no FPU, we chose to flush to zero preserving
@@ -897,25 +898,25 @@
 
 ; CORTEX-M0:  .cpu cortex-m0
 ; CORTEX-M0:  .eabi_attribute 6, 12
-; CORTEX-M0-NOT:  .eabi_attribute 7
+; CORTEX-M0:  .eabi_attribute 7, 77
 ; CORTEX-M0:  .eabi_attribute 8, 0
 ; CORTEX-M0:  .eabi_attribute 9, 1
+; CORTEX-M0-NOT:  .eabi_attribute 27
+; CORTEX-M0-NOT:  .eabi_attribute 36
+; CORTEX-M0: .eabi_attribute 34, 0
+; CORTEX-M0-NOT:  .eabi_attribute 42
+; CORTEX-M0-NOT:  .eabi_attribute 44
+; CORTEX-M0-NOT:  .eabi_attribute 68
 ; CORTEX-M0-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-M0:  .eabi_attribute 20, 1
 ; CORTEX-M0:  .eabi_attribute 21, 1
 ; CORTEX-M0-NOT:  .eabi_attribute 22
 ; CORTEX-M0:  .eabi_attribute 23, 3
-; CORTEX-M0: .eabi_attribute 34, 0
 ; CORTEX-M0:  .eabi_attribute 24, 1
 ; CORTEX-M0:  .eabi_attribute 25, 1
-; CORTEX-M0-NOT:  .eabi_attribute 27
 ; CORTEX-M0-NOT:  .eabi_attribute 28
-; CORTEX-M0-NOT:  .eabi_attribute 36
 ; CORTEX-M0:  .eabi_attribute 38, 1
-; CORTEX-M0-NOT:  .eabi_attribute 42
-; CORTEX-M0-NOT:  .eabi_attribute 44
-; CORTEX-M0-NOT:  .eabi_attribute 68
 
 ; CORTEX-M0-FAST-NOT:   .eabi_attribute 19
 ;; Despite the M0 CPU having no FPU in this scenario, we chose to
@@ -930,9 +931,14 @@
 
 ; CORTEX-M0PLUS:  .cpu cortex-m0plus
 ; CORTEX-M0PLUS:  .eabi_attribute 6, 12
-; CORTEX-M0PLUS-NOT:  .eabi_attribute 7
+; CORTEX-M0PLUS:  .eabi_attribute 7, 77
 ; CORTEX-M0PLUS:  .eabi_attribute 8, 0
 ; CORTEX-M0PLUS:  .eabi_attribute 9, 1
+; CORTEX-M0PLUS-NOT:  .eabi_attribute 27
+; CORTEX-M0PLUS-NOT:  .eabi_attribute 36
+; CORTEX-M0PLUS-NOT:  .eabi_attribute 42
+; CORTEX-M0PLUS-NOT:  .eabi_attribute 44
+; CORTEX-M0PLUS-NOT:  .eabi_attribute 68
 ; CORTEX-M0PLUS-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-M0PLUS:  .eabi_attribute 20, 1
@@ -941,13 +947,8 @@
 ; CORTEX-M0PLUS:  .eabi_attribute 23, 3
 ; CORTEX-M0PLUS:  .eabi_attribute 24, 1
 ; CORTEX-M0PLUS:  .eabi_attribute 25, 1
-; CORTEX-M0PLUS-NOT:  .eabi_attribute 27
 ; CORTEX-M0PLUS-NOT:  .eabi_attribute 28
-; CORTEX-M0PLUS-NOT:  .eabi_attribute 36
 ; CORTEX-M0PLUS:  .eabi_attribute 38, 1
-; CORTEX-M0PLUS-NOT:  .eabi_attribute 42
-; CORTEX-M0PLUS-NOT:  .eabi_attribute 44
-; CORTEX-M0PLUS-NOT:  .eabi_attribute 68
 
 ; CORTEX-M0PLUS-FAST-NOT:   .eabi_attribute 19
 ;; Despite the M0+ CPU having no FPU in this scenario, we chose to
@@ -962,9 +963,14 @@
 
 ; CORTEX-M1:  .cpu cortex-m1
 ; CORTEX-M1:  .eabi_attribute 6, 12
-; CORTEX-M1-NOT:  .eabi_attribute 7
+; CORTEX-M1:  .eabi_attribute 7, 77
 ; CORTEX-M1:  .eabi_attribute 8, 0
 ; CORTEX-M1:  .eabi_attribute 9, 1
+; CORTEX-M1-NOT:  .eabi_attribute 27
+; CORTEX-M1-NOT:  .eabi_attribute 36
+; CORTEX-M1-NOT:  .eabi_attribute 42
+; CORTEX-M1-NOT:  .eabi_attribute 44
+; CORTEX-M1-NOT:  .eabi_attribute 68
 ; CORTEX-M1-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-M1:  .eabi_attribute 20, 1
@@ -973,13 +979,8 @@
 ; CORTEX-M1:  .eabi_attribute 23, 3
 ; CORTEX-M1:  .eabi_attribute 24, 1
 ; CORTEX-M1:  .eabi_attribute 25, 1
-; CORTEX-M1-NOT:  .eabi_attribute 27
 ; CORTEX-M1-NOT:  .eabi_attribute 28
-; CORTEX-M1-NOT:  .eabi_attribute 36
 ; CORTEX-M1:  .eabi_attribute 38, 1
-; CORTEX-M1-NOT:  .eabi_attribute 42
-; CORTEX-M1-NOT:  .eabi_attribute 44
-; CORTEX-M1-NOT:  .eabi_attribute 68
 
 ; CORTEX-M1-FAST-NOT:   .eabi_attribute 19
 ;; Despite the M1 CPU having no FPU in this scenario, we chose to
@@ -994,9 +995,13 @@
 
 ; SC000:  .cpu sc000
 ; SC000:  .eabi_attribute 6, 12
-; SC000-NOT:  .eabi_attribute 7
+; SC000:  .eabi_attribute 7, 77
 ; SC000:  .eabi_attribute 8, 0
 ; SC000:  .eabi_attribute 9, 1
+; SC000-NOT:  .eabi_attribute 27
+; SC000-NOT:  .eabi_attribute 42
+; SC000-NOT:  .eabi_attribute 44
+; SC000-NOT:  .eabi_attribute 68
 ; SC000-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; SC000:  .eabi_attribute 20, 1
@@ -1005,13 +1010,8 @@
 ; SC000:  .eabi_attribute 23, 3
 ; SC000:  .eabi_attribute 24, 1
 ; SC000:  .eabi_attribute 25, 1
-; SC000-NOT:  .eabi_attribute 27
 ; SC000-NOT:  .eabi_attribute 28
-; SC000-NOT:  .eabi_attribute 36
 ; SC000:  .eabi_attribute 38, 1
-; SC000-NOT:  .eabi_attribute 42
-; SC000-NOT:  .eabi_attribute 44
-; SC000-NOT:  .eabi_attribute 68
 
 ; SC000-FAST-NOT:   .eabi_attribute 19
 ;; Despite the SC000 CPU having no FPU in this scenario, we chose to
@@ -1029,6 +1029,11 @@
 ; CORTEX-M3:  .eabi_attribute 7, 77
 ; CORTEX-M3:  .eabi_attribute 8, 0
 ; CORTEX-M3:  .eabi_attribute 9, 2
+; CORTEX-M3-NOT:  .eabi_attribute 27
+; CORTEX-M3-NOT:  .eabi_attribute 36
+; CORTEX-M3-NOT:  .eabi_attribute 42
+; CORTEX-M3-NOT:  .eabi_attribute 44
+; CORTEX-M3-NOT:  .eabi_attribute 68
 ; CORTEX-M3-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-M3:  .eabi_attribute 20, 1
@@ -1037,13 +1042,8 @@
 ; CORTEX-M3:  .eabi_attribute 23, 3
 ; CORTEX-M3:  .eabi_attribute 24, 1
 ; CORTEX-M3:  .eabi_attribute 25, 1
-; CORTEX-M3-NOT:  .eabi_attribute 27
 ; CORTEX-M3-NOT:  .eabi_attribute 28
-; CORTEX-M3-NOT:  .eabi_attribute 36
 ; CORTEX-M3:  .eabi_attribute 38, 1
-; CORTEX-M3-NOT:  .eabi_attribute 42
-; CORTEX-M3-NOT:  .eabi_attribute 44
-; CORTEX-M3-NOT:  .eabi_attribute 68
 
 ; CORTEX-M3-FAST-NOT:   .eabi_attribute 19
 ;; Despite there being no FPU, we chose to flush to zero preserving
@@ -1059,6 +1059,11 @@
 ; SC300:  .eabi_attribute 7, 77
 ; SC300:  .eabi_attribute 8, 0
 ; SC300:  .eabi_attribute 9, 2
+; SC300-NOT:  .eabi_attribute 27
+; SC300-NOT:  .eabi_attribute 36
+; SC300-NOT:  .eabi_attribute 42
+; SC300-NOT:  .eabi_attribute 44
+; SC300-NOT:  .eabi_attribute 68
 ; SC300-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; SC300:  .eabi_attribute 20, 1
@@ -1067,13 +1072,8 @@
 ; SC300:  .eabi_attribute 23, 3
 ; SC300:  .eabi_attribute 24, 1
 ; SC300:  .eabi_attribute 25, 1
-; SC300-NOT:  .eabi_attribute 27
 ; SC300-NOT:  .eabi_attribute 28
-; SC300-NOT:  .eabi_attribute 36
 ; SC300:  .eabi_attribute 38, 1
-; SC300-NOT:  .eabi_attribute 42
-; SC300-NOT:  .eabi_attribute 44
-; SC300-NOT:  .eabi_attribute 68
 
 ; SC300-FAST-NOT:   .eabi_attribute 19
 ;; Despite there being no FPU, we chose to flush to zero preserving
@@ -1090,6 +1090,11 @@
 ; CORTEX-M4-SOFT:  .eabi_attribute 8, 0
 ; CORTEX-M4-SOFT:  .eabi_attribute 9, 2
 ; CORTEX-M4-SOFT:  .fpu fpv4-sp-d16
+; CORTEX-M4-SOFT:  .eabi_attribute 27, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 36, 1
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 42
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 44
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 68
 ; CORTEX-M4-SOFT-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-M4-SOFT:  .eabi_attribute 20, 1
@@ -1098,13 +1103,8 @@
 ; CORTEX-M4-SOFT:  .eabi_attribute 23, 3
 ; CORTEX-M4-SOFT:  .eabi_attribute 24, 1
 ; CORTEX-M4-SOFT:  .eabi_attribute 25, 1
-; CORTEX-M4-SOFT:  .eabi_attribute 27, 1
 ; CORTEX-M4-SOFT-NOT:  .eabi_attribute 28
-; CORTEX-M4-SOFT:  .eabi_attribute 36, 1
 ; CORTEX-M4-SOFT:  .eabi_attribute 38, 1
-; CORTEX-M4-SOFT-NOT:  .eabi_attribute 42
-; CORTEX-M4-SOFT-NOT:  .eabi_attribute 44
-; CORTEX-M4-SOFT-NOT:  .eabi_attribute 68
 
 ; CORTEX-M4-SOFT-FAST-NOT:   .eabi_attribute 19
 ;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when
@@ -1120,6 +1120,11 @@
 ; CORTEX-M4-HARD:  .eabi_attribute 8, 0
 ; CORTEX-M4-HARD:  .eabi_attribute 9, 2
 ; CORTEX-M4-HARD:  .fpu fpv4-sp-d16
+; CORTEX-M4-HARD:  .eabi_attribute 27, 1
+; CORTEX-M4-HARD:  .eabi_attribute 36, 1
+; CORTEX-M4-HARD-NOT:  .eabi_attribute 42
+; CORTEX-M4-HARD-NOT:  .eabi_attribute 44
+; CORTEX-M4-HARD-NOT:  .eabi_attribute 68
 ; CORTEX-M4-HARD-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-M4-HARD:  .eabi_attribute 20, 1
@@ -1128,13 +1133,8 @@
 ; CORTEX-M4-HARD:  .eabi_attribute 23, 3
 ; CORTEX-M4-HARD:  .eabi_attribute 24, 1
 ; CORTEX-M4-HARD:  .eabi_attribute 25, 1
-; CORTEX-M4-HARD:  .eabi_attribute 27, 1
 ; CORTEX-M4-HARD:  .eabi_attribute 28, 1
-; CORTEX-M4-HARD:  .eabi_attribute 36, 1
 ; CORTEX-M4-HARD:  .eabi_attribute 38, 1
-; CORTEX-M4-HARD-NOT:  .eabi_attribute 42
-; CORTEX-M4-HARD-NOT:  .eabi_attribute 44
-; CORTEX-M4-HARD-NOT:  .eabi_attribute 68
 
 ; CORTEX-M4-HARD-FAST-NOT:   .eabi_attribute 19
 ;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when
@@ -1152,6 +1152,11 @@
 ; CORTEX-M7-SOFT-NOT: .fpu
 ; CORTEX-M7-SINGLE:  .fpu fpv5-sp-d16
 ; CORTEX-M7-DOUBLE:  .fpu fpv5-d16
+; CORTEX-M7-SOFT-NOT: .eabi_attribute 27
+; CORTEX-M7-SINGLE:  .eabi_attribute 27, 1
+; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27
+; CORTEX-M7:  .eabi_attribute 36, 1
+; CORTEX-M7-NOT:  .eabi_attribute 44
 ; CORTEX-M7:  .eabi_attribute 17, 1
 ; CORTEX-M7-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
@@ -1161,12 +1166,7 @@
 ; CORTEX-M7:  .eabi_attribute 23, 3
 ; CORTEX-M7:  .eabi_attribute 24, 1
 ; CORTEX-M7:  .eabi_attribute 25, 1
-; CORTEX-M7-SOFT-NOT: .eabi_attribute 27
-; CORTEX-M7-SINGLE:  .eabi_attribute 27, 1
-; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27
-; CORTEX-M7:  .eabi_attribute 36, 1
 ; CORTEX-M7:  .eabi_attribute 38, 1
-; CORTEX-M7-NOT:  .eabi_attribute 44
 ; CORTEX-M7:  .eabi_attribute 14, 0
 
 ; CORTEX-M7-NOFPU-FAST-NOT:   .eabi_attribute 19
@@ -1186,6 +1186,10 @@
 ; CORTEX-R4:  .eabi_attribute 8, 1
 ; CORTEX-R4:  .eabi_attribute 9, 2
 ; CORTEX-R4-NOT:  .fpu vfpv3-d16
+; CORTEX-R4-NOT:  .eabi_attribute 36
+; CORTEX-R4-NOT:  .eabi_attribute 42
+; CORTEX-R4-NOT:  .eabi_attribute 44
+; CORTEX-R4-NOT:  .eabi_attribute 68
 ; CORTEX-R4-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-R4:  .eabi_attribute 20, 1
@@ -1195,11 +1199,7 @@
 ; CORTEX-R4:  .eabi_attribute 24, 1
 ; CORTEX-R4:  .eabi_attribute 25, 1
 ; CORTEX-R4-NOT:  .eabi_attribute 28
-; CORTEX-R4-NOT:  .eabi_attribute 36
 ; CORTEX-R4:  .eabi_attribute 38, 1
-; CORTEX-R4-NOT:  .eabi_attribute 42
-; CORTEX-R4-NOT:  .eabi_attribute 44
-; CORTEX-R4-NOT:  .eabi_attribute 68
 
 ; CORTEX-R4F:  .cpu cortex-r4f
 ; CORTEX-R4F:  .eabi_attribute 6, 10
@@ -1207,6 +1207,11 @@
 ; CORTEX-R4F:  .eabi_attribute 8, 1
 ; CORTEX-R4F:  .eabi_attribute 9, 2
 ; CORTEX-R4F:  .fpu vfpv3-d16
+; CORTEX-R4F-NOT:  .eabi_attribute 27, 1
+; CORTEX-R4F-NOT:  .eabi_attribute 36
+; CORTEX-R4F-NOT:  .eabi_attribute 42
+; CORTEX-R4F-NOT:  .eabi_attribute 44
+; CORTEX-R4F-NOT:  .eabi_attribute 68
 ; CORTEX-R4F-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-R4F:  .eabi_attribute 20, 1
@@ -1215,13 +1220,8 @@
 ; CORTEX-R4F:  .eabi_attribute 23, 3
 ; CORTEX-R4F:  .eabi_attribute 24, 1
 ; CORTEX-R4F:  .eabi_attribute 25, 1
-; CORTEX-R4F-NOT:  .eabi_attribute 27, 1
 ; CORTEX-R4F-NOT:  .eabi_attribute 28
-; CORTEX-R4F-NOT:  .eabi_attribute 36
 ; CORTEX-R4F:  .eabi_attribute 38, 1
-; CORTEX-R4F-NOT:  .eabi_attribute 42
-; CORTEX-R4F-NOT:  .eabi_attribute 44
-; CORTEX-R4F-NOT:  .eabi_attribute 68
 
 ; CORTEX-R5:  .cpu cortex-r5
 ; CORTEX-R5:  .eabi_attribute 6, 10
@@ -1229,6 +1229,11 @@
 ; CORTEX-R5:  .eabi_attribute 8, 1
 ; CORTEX-R5:  .eabi_attribute 9, 2
 ; CORTEX-R5:  .fpu vfpv3-d16
+; CORTEX-R5-NOT:  .eabi_attribute 27, 1
+; CORTEX-R5-NOT:  .eabi_attribute 36
+; CORTEX-R5:  .eabi_attribute 44, 2
+; CORTEX-R5-NOT:  .eabi_attribute 42
+; CORTEX-R5-NOT:  .eabi_attribute 68
 ; CORTEX-R5-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-R5:  .eabi_attribute 20, 1
@@ -1237,13 +1242,8 @@
 ; CORTEX-R5:  .eabi_attribute 23, 3
 ; CORTEX-R5:  .eabi_attribute 24, 1
 ; CORTEX-R5:  .eabi_attribute 25, 1
-; CORTEX-R5-NOT:  .eabi_attribute 27, 1
 ; CORTEX-R5-NOT:  .eabi_attribute 28
-; CORTEX-R5-NOT:  .eabi_attribute 36
 ; CORTEX-R5:  .eabi_attribute 38, 1
-; CORTEX-R5-NOT:  .eabi_attribute 42
-; CORTEX-R5:  .eabi_attribute 44, 2
-; CORTEX-R5-NOT:  .eabi_attribute 68
 
 ; CORTEX-R5-FAST-NOT:   .eabi_attribute 19
 ;; The R5 has the VFPv3 FP unit, which always flushes preserving sign.
@@ -1258,6 +1258,10 @@
 ; CORTEX-R7:  .eabi_attribute 8, 1
 ; CORTEX-R7:  .eabi_attribute 9, 2
 ; CORTEX-R7:  .fpu vfpv3-d16-fp16
+; CORTEX-R7:  .eabi_attribute 36, 1
+; CORTEX-R7:  .eabi_attribute 42, 1
+; CORTEX-R7:  .eabi_attribute 44, 2
+; CORTEX-R7-NOT:  .eabi_attribute 68
 ; CORTEX-R7-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-R7:  .eabi_attribute 20, 1
@@ -1267,11 +1271,7 @@
 ; CORTEX-R7:  .eabi_attribute 24, 1
 ; CORTEX-R7:  .eabi_attribute 25, 1
 ; CORTEX-R7-NOT:  .eabi_attribute 28
-; CORTEX-R7:  .eabi_attribute 36, 1
 ; CORTEX-R7:  .eabi_attribute 38, 1
-; CORTEX-R7:  .eabi_attribute 42, 1
-; CORTEX-R7:  .eabi_attribute 44, 2
-; CORTEX-R7-NOT:  .eabi_attribute 68
 
 ; CORTEX-R7-FAST-NOT:   .eabi_attribute 19
 ;; The R7 has the VFPv3 FP unit, which always flushes preserving sign.
@@ -1286,6 +1286,10 @@
 ; CORTEX-R8:  .eabi_attribute 8, 1
 ; CORTEX-R8:  .eabi_attribute 9, 2
 ; CORTEX-R8:  .fpu vfpv3-d16-fp16
+; CORTEX-R8:  .eabi_attribute 36, 1
+; CORTEX-R8:  .eabi_attribute 42, 1
+; CORTEX-R8:  .eabi_attribute 44, 2
+; CORTEX-R8-NOT:  .eabi_attribute 68
 ; CORTEX-R8-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-R8:  .eabi_attribute 20, 1
@@ -1295,11 +1299,7 @@
 ; CORTEX-R8:  .eabi_attribute 24, 1
 ; CORTEX-R8:  .eabi_attribute 25, 1
 ; CORTEX-R8-NOT:  .eabi_attribute 28
-; CORTEX-R8:  .eabi_attribute 36, 1
 ; CORTEX-R8:  .eabi_attribute 38, 1
-; CORTEX-R8:  .eabi_attribute 42, 1
-; CORTEX-R8:  .eabi_attribute 44, 2
-; CORTEX-R8-NOT:  .eabi_attribute 68
 
 ; CORTEX-R8-FAST-NOT:   .eabi_attribute 19
 ;; The R8 has the VFPv3 FP unit, which always flushes preserving sign.
@@ -1315,6 +1315,11 @@
 ; CORTEX-A32:  .eabi_attribute 9, 2
 ; CORTEX-A32:  .fpu crypto-neon-fp-armv8
 ; CORTEX-A32:  .eabi_attribute 12, 3
+; CORTEX-A32-NOT:  .eabi_attribute 27
+; CORTEX-A32:  .eabi_attribute 36, 1
+; CORTEX-A32:  .eabi_attribute 42, 1
+; CORTEX-A32-NOT:  .eabi_attribute 44
+; CORTEX-A32:  .eabi_attribute 68, 3
 ; CORTEX-A32-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A32:  .eabi_attribute 20, 1
@@ -1323,13 +1328,8 @@
 ; CORTEX-A32:  .eabi_attribute 23, 3
 ; CORTEX-A32:  .eabi_attribute 24, 1
 ; CORTEX-A32:  .eabi_attribute 25, 1
-; CORTEX-A32-NOT:  .eabi_attribute 27
 ; CORTEX-A32-NOT:  .eabi_attribute 28
-; CORTEX-A32:  .eabi_attribute 36, 1
 ; CORTEX-A32:  .eabi_attribute 38, 1
-; CORTEX-A32:  .eabi_attribute 42, 1
-; CORTEX-A32-NOT:  .eabi_attribute 44
-; CORTEX-A32:  .eabi_attribute 68, 3
 
 ; CORTEX-A32-FAST-NOT:   .eabi_attribute 19
 ;; The A32 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1343,20 +1343,20 @@
 ; CORTEX-M23:  .eabi_attribute 7, 77
 ; CORTEX-M23:  .eabi_attribute 8, 0
 ; CORTEX-M23:  .eabi_attribute 9, 3
+; CORTEX-M23-NOT:  .eabi_attribute 27
+; CORTEX-M23:  .eabi_attribute 34, 1
+; CORTEX-M23-NOT:  .eabi_attribute 44
 ; CORTEX-M23:  .eabi_attribute 17, 1
 ;; We default to IEEE 754 compliance
 ; CORTEX-M23-NOT:   .eabi_attribute 19
 ; CORTEX-M23:  .eabi_attribute 20, 1
 ; CORTEX-M23:  .eabi_attribute 21, 1
 ; CORTEX-M23:  .eabi_attribute 23, 3
-; CORTEX-M23:  .eabi_attribute 34, 1
 ; CORTEX-M23:  .eabi_attribute 24, 1
-; CORTEX-M23-NOT:  .eabi_attribute 27
 ; CORTEX-M23-NOT:  .eabi_attribute 28
 ; CORTEX-M23:  .eabi_attribute 25, 1
 ; CORTEX-M23:  .eabi_attribute 38, 1
 ; CORTEX-M23:  .eabi_attribute 14, 0
-; CORTEX-M23-NOT:  .eabi_attribute 44
 
 ; CORTEX-M33:  .cpu cortex-m33
 ; CORTEX-M33:  .eabi_attribute 6, 17
@@ -1364,21 +1364,21 @@
 ; CORTEX-M33:  .eabi_attribute 8, 0
 ; CORTEX-M33:  .eabi_attribute 9, 3
 ; CORTEX-M33:  .fpu fpv5-sp-d16
+; CORTEX-M33:  .eabi_attribute 27, 1
+; CORTEX-M33:  .eabi_attribute 36, 1
+; CORTEX-M33-NOT:  .eabi_attribute 44
+; CORTEX-M33:  .eabi_attribute 46, 1
+; CORTEX-M33:  .eabi_attribute 34, 1
 ; CORTEX-M33:  .eabi_attribute 17, 1
 ;; We default to IEEE 754 compliance
 ; CORTEX-M23-NOT:   .eabi_attribute 19
 ; CORTEX-M33:  .eabi_attribute 20, 1
 ; CORTEX-M33:  .eabi_attribute 21, 1
 ; CORTEX-M33:  .eabi_attribute 23, 3
-; CORTEX-M33:  .eabi_attribute 34, 1
 ; CORTEX-M33:  .eabi_attribute 24, 1
 ; CORTEX-M33:  .eabi_attribute 25, 1
-; CORTEX-M33:  .eabi_attribute 27, 1
 ; CORTEX-M33-NOT:  .eabi_attribute 28
-; CORTEX-M33:  .eabi_attribute 36, 1
 ; CORTEX-M33:  .eabi_attribute 38, 1
-; CORTEX-M33:  .eabi_attribute 46, 1
-; CORTEX-M33-NOT:  .eabi_attribute 44
 ; CORTEX-M33:  .eabi_attribute 14, 0
 
 ; CORTEX-M33-FAST-NOT:   .eabi_attribute 19
@@ -1394,6 +1394,11 @@
 ; CORTEX-A35:  .eabi_attribute 9, 2
 ; CORTEX-A35:  .fpu crypto-neon-fp-armv8
 ; CORTEX-A35:  .eabi_attribute 12, 3
+; CORTEX-A35-NOT:  .eabi_attribute 27
+; CORTEX-A35:  .eabi_attribute 36, 1
+; CORTEX-A35:  .eabi_attribute 42, 1
+; CORTEX-A35-NOT:  .eabi_attribute 44
+; CORTEX-A35:  .eabi_attribute 68, 3
 ; CORTEX-A35-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A35:  .eabi_attribute 20, 1
@@ -1402,13 +1407,8 @@
 ; CORTEX-A35:  .eabi_attribute 23, 3
 ; CORTEX-A35:  .eabi_attribute 24, 1
 ; CORTEX-A35:  .eabi_attribute 25, 1
-; CORTEX-A35-NOT:  .eabi_attribute 27
 ; CORTEX-A35-NOT:  .eabi_attribute 28
-; CORTEX-A35:  .eabi_attribute 36, 1
 ; CORTEX-A35:  .eabi_attribute 38, 1
-; CORTEX-A35:  .eabi_attribute 42, 1
-; CORTEX-A35-NOT:  .eabi_attribute 44
-; CORTEX-A35:  .eabi_attribute 68, 3
 
 ; CORTEX-A35-FAST-NOT:   .eabi_attribute 19
 ;; The A35 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1424,6 +1424,11 @@
 ; CORTEX-A53:  .eabi_attribute 9, 2
 ; CORTEX-A53:  .fpu crypto-neon-fp-armv8
 ; CORTEX-A53:  .eabi_attribute 12, 3
+; CORTEX-A53-NOT:  .eabi_attribute 27
+; CORTEX-A53:  .eabi_attribute 36, 1
+; CORTEX-A53:  .eabi_attribute 42, 1
+; CORTEX-A53-NOT:  .eabi_attribute 44
+; CORTEX-A53:  .eabi_attribute 68, 3
 ; CORTEX-A53-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A53:  .eabi_attribute 20, 1
@@ -1432,13 +1437,8 @@
 ; CORTEX-A53:  .eabi_attribute 23, 3
 ; CORTEX-A53:  .eabi_attribute 24, 1
 ; CORTEX-A53:  .eabi_attribute 25, 1
-; CORTEX-A53-NOT:  .eabi_attribute 27
 ; CORTEX-A53-NOT:  .eabi_attribute 28
-; CORTEX-A53:  .eabi_attribute 36, 1
 ; CORTEX-A53:  .eabi_attribute 38, 1
-; CORTEX-A53:  .eabi_attribute 42, 1
-; CORTEX-A53-NOT:  .eabi_attribute 44
-; CORTEX-A53:  .eabi_attribute 68, 3
 
 ; CORTEX-A53-FAST-NOT:   .eabi_attribute 19
 ;; The A53 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1454,6 +1454,11 @@
 ; CORTEX-A57:  .eabi_attribute 9, 2
 ; CORTEX-A57:  .fpu crypto-neon-fp-armv8
 ; CORTEX-A57:  .eabi_attribute 12, 3
+; CORTEX-A57-NOT:  .eabi_attribute 27
+; CORTEX-A57:  .eabi_attribute 36, 1
+; CORTEX-A57:  .eabi_attribute 42, 1
+; CORTEX-A57-NOT:  .eabi_attribute 44
+; CORTEX-A57:  .eabi_attribute 68, 3
 ; CORTEX-A57-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A57:  .eabi_attribute 20, 1
@@ -1462,13 +1467,8 @@
 ; CORTEX-A57:  .eabi_attribute 23, 3
 ; CORTEX-A57:  .eabi_attribute 24, 1
 ; CORTEX-A57:  .eabi_attribute 25, 1
-; CORTEX-A57-NOT:  .eabi_attribute 27
 ; CORTEX-A57-NOT:  .eabi_attribute 28
-; CORTEX-A57:  .eabi_attribute 36, 1
 ; CORTEX-A57:  .eabi_attribute 38, 1
-; CORTEX-A57:  .eabi_attribute 42, 1
-; CORTEX-A57-NOT:  .eabi_attribute 44
-; CORTEX-A57:  .eabi_attribute 68, 3
 
 ; CORTEX-A57-FAST-NOT:   .eabi_attribute 19
 ;; The A57 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1484,6 +1484,11 @@
 ; CORTEX-A72:  .eabi_attribute 9, 2
 ; CORTEX-A72:  .fpu crypto-neon-fp-armv8
 ; CORTEX-A72:  .eabi_attribute 12, 3
+; CORTEX-A72-NOT:  .eabi_attribute 27
+; CORTEX-A72:  .eabi_attribute 36, 1
+; CORTEX-A72:  .eabi_attribute 42, 1
+; CORTEX-A72-NOT:  .eabi_attribute 44
+; CORTEX-A72:  .eabi_attribute 68, 3
 ; CORTEX-A72-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A72:  .eabi_attribute 20, 1
@@ -1492,13 +1497,8 @@
 ; CORTEX-A72:  .eabi_attribute 23, 3
 ; CORTEX-A72:  .eabi_attribute 24, 1
 ; CORTEX-A72:  .eabi_attribute 25, 1
-; CORTEX-A72-NOT:  .eabi_attribute 27
 ; CORTEX-A72-NOT:  .eabi_attribute 28
-; CORTEX-A72:  .eabi_attribute 36, 1
 ; CORTEX-A72:  .eabi_attribute 38, 1
-; CORTEX-A72:  .eabi_attribute 42, 1
-; CORTEX-A72-NOT:  .eabi_attribute 44
-; CORTEX-A72:  .eabi_attribute 68, 3
 
 ; CORTEX-A72-FAST-NOT:   .eabi_attribute 19
 ;; The A72 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1514,6 +1514,11 @@
 ; CORTEX-A73:  .eabi_attribute 9, 2
 ; CORTEX-A73:  .fpu  crypto-neon-fp-armv8
 ; CORTEX-A73:  .eabi_attribute 12, 3
+; CORTEX-A73-NOT: .eabi_attribute 27
+; CORTEX-A73:  .eabi_attribute 36, 1
+; CORTEX-A73:  .eabi_attribute 42, 1
+; CORTEX-A73-NOT: .eabi_attribute 44
+; CORTEX-A73:  .eabi_attribute 68, 3
 ; CORTEX-A73-NOT: .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-A73:  .eabi_attribute 20, 1
@@ -1522,14 +1527,9 @@
 ; CORTEX-A73:  .eabi_attribute 23, 3
 ; CORTEX-A73:  .eabi_attribute 24, 1
 ; CORTEX-A73:  .eabi_attribute 25, 1
-; CORTEX-A73-NOT: .eabi_attribute 27
 ; CORTEX-A73-NOT: .eabi_attribute 28
-; CORTEX-A73:  .eabi_attribute 36, 1
 ; CORTEX-A73:  .eabi_attribute 38, 1
-; CORTEX-A73:  .eabi_attribute 42, 1
-; CORTEX-A73-NOT: .eabi_attribute 44
 ; CORTEX-A73:  .eabi_attribute 14, 0
-; CORTEX-A73:  .eabi_attribute 68, 3
 
 ; EXYNOS-M1:  .cpu exynos-m1
 ; EXYNOS-M1:  .eabi_attribute 6, 14
@@ -1538,6 +1538,11 @@
 ; EXYNOS-M1:  .eabi_attribute 9, 2
 ; EXYNOS-M1:  .fpu crypto-neon-fp-armv8
 ; EXYNOS-M1:  .eabi_attribute 12, 3
+; EXYNOS-M1-NOT:  .eabi_attribute 27
+; EXYNOS-M1:  .eabi_attribute 36, 1
+; EXYNOS-M1:  .eabi_attribute 42, 1
+; EXYNOS-M1-NOT:  .eabi_attribute 44
+; EXYNOS-M1:  .eabi_attribute 68, 3
 ; EXYNOS-M1-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; EXYNOS-M1:  .eabi_attribute 20, 1
@@ -1546,13 +1551,8 @@
 ; EXYNOS-M1:  .eabi_attribute 23, 3
 ; EXYNOS-M1:  .eabi_attribute 24, 1
 ; EXYNOS-M1:  .eabi_attribute 25, 1
-; EXYNOS-M1-NOT:  .eabi_attribute 27
 ; EXYNOS-M1-NOT:  .eabi_attribute 28
-; EXYNOS-M1:  .eabi_attribute 36, 1
 ; EXYNOS-M1:  .eabi_attribute 38, 1
-; EXYNOS-M1:  .eabi_attribute 42, 1
-; EXYNOS-M1-NOT:  .eabi_attribute 44
-; EXYNOS-M1:  .eabi_attribute 68, 3
 
 ; EXYNOS-M1-FAST-NOT:   .eabi_attribute 19
 ;; The exynos-m1 has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1568,6 +1568,11 @@
 ; EXYNOS-M2:  .eabi_attribute 9, 2
 ; EXYNOS-M2:  .fpu crypto-neon-fp-armv8
 ; EXYNOS-M2:  .eabi_attribute 12, 3
+; EXYNOS-M2-NOT:  .eabi_attribute 27
+; EXYNOS-M2:  .eabi_attribute 36, 1
+; EXYNOS-M2:  .eabi_attribute 42, 1
+; EXYNOS-M2-NOT:  .eabi_attribute 44
+; EXYNOS-M2:  .eabi_attribute 68, 3
 ; EXYNOS-M2-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; EXYNOS-M2:  .eabi_attribute 20, 1
@@ -1576,13 +1581,8 @@
 ; EXYNOS-M2:  .eabi_attribute 23, 3
 ; EXYNOS-M2:  .eabi_attribute 24, 1
 ; EXYNOS-M2:  .eabi_attribute 25, 1
-; EXYNOS-M2-NOT:  .eabi_attribute 27
 ; EXYNOS-M2-NOT:  .eabi_attribute 28
-; EXYNOS-M2:  .eabi_attribute 36, 1
 ; EXYNOS-M2:  .eabi_attribute 38, 1
-; EXYNOS-M2:  .eabi_attribute 42, 1
-; EXYNOS-M2-NOT:  .eabi_attribute 44
-; EXYNOS-M2:  .eabi_attribute 68, 3
 
 ; EXYNOS-M3:  .cpu exynos-m3
 ; EXYNOS-M3:  .eabi_attribute 6, 14
@@ -1591,6 +1591,11 @@
 ; EXYNOS-M3:  .eabi_attribute 9, 2
 ; EXYNOS-M3:  .fpu crypto-neon-fp-armv8
 ; EXYNOS-M3:  .eabi_attribute 12, 3
+; EXYNOS-M3-NOT:  .eabi_attribute 27
+; EXYNOS-M3:  .eabi_attribute 36, 1
+; EXYNOS-M3:  .eabi_attribute 42, 1
+; EXYNOS-M3-NOT:  .eabi_attribute 44
+; EXYNOS-M3:  .eabi_attribute 68, 3
 ; EXYNOS-M3-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; EXYNOS-M3:  .eabi_attribute 20, 1
@@ -1599,13 +1604,8 @@
 ; EXYNOS-M3:  .eabi_attribute 23, 3
 ; EXYNOS-M3:  .eabi_attribute 24, 1
 ; EXYNOS-M3:  .eabi_attribute 25, 1
-; EXYNOS-M3-NOT:  .eabi_attribute 27
 ; EXYNOS-M3-NOT:  .eabi_attribute 28
-; EXYNOS-M3:  .eabi_attribute 36, 1
 ; EXYNOS-M3:  .eabi_attribute 38, 1
-; EXYNOS-M3:  .eabi_attribute 42, 1
-; EXYNOS-M3-NOT:  .eabi_attribute 44
-; EXYNOS-M3:  .eabi_attribute 68, 3
 
 ; GENERIC-FPU-VFPV3-FP16: .fpu vfpv3-fp16
 ; GENERIC-FPU-VFPV3-D16-FP16: .fpu vfpv3-d16-fp16
@@ -1619,6 +1619,11 @@
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 9, 2
 ; GENERIC-ARMV8_1-A:  .fpu crypto-neon-fp-armv8
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 12, 4
+; GENERIC-ARMV8_1-A-NOT:  .eabi_attribute 27
+; GENERIC-ARMV8_1-A:  .eabi_attribute 36, 1
+; GENERIC-ARMV8_1-A:  .eabi_attribute 42, 1
+; GENERIC-ARMV8_1-A-NOT:  .eabi_attribute 44
+; GENERIC-ARMV8_1-A:  .eabi_attribute 68, 3
 ; GENERIC-ARMV8_1-A-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 20, 1
@@ -1627,13 +1632,8 @@
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 23, 3
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 24, 1
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 25, 1
-; GENERIC-ARMV8_1-A-NOT:  .eabi_attribute 27
 ; GENERIC-ARMV8_1-A-NOT:  .eabi_attribute 28
-; GENERIC-ARMV8_1-A:  .eabi_attribute 36, 1
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 38, 1
-; GENERIC-ARMV8_1-A:  .eabi_attribute 42, 1
-; GENERIC-ARMV8_1-A-NOT:  .eabi_attribute 44
-; GENERIC-ARMV8_1-A:  .eabi_attribute 68, 3
 
 ; GENERIC-ARMV8_1-A-FAST-NOT:   .eabi_attribute 19
 ;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign.
@@ -1670,23 +1670,16 @@
 ; ARMv8R-SP-NOT: .eabi_attribute 12
 ; ARMv8R-NEON: .fpu    neon-fp-armv8
 ; ARMv8R-NEON: .eabi_attribute 12, 3   @ Tag_Advanced_SIMD_arch
-; ARMv8R: .eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use
-; ARMv8R: .eabi_attribute 20, 1   @ Tag_ABI_FP_denormal
-; ARMv8R: .eabi_attribute 21, 1   @ Tag_ABI_FP_exceptions
-; ARMv8R: .eabi_attribute 23, 3   @ Tag_ABI_FP_number_model
-; ARMv8R: .eabi_attribute 34, 1   @ Tag_CPU_unaligned_access
-; ARMv8R: .eabi_attribute 24, 1   @ Tag_ABI_align_needed
-; ARMv8R: .eabi_attribute 25, 1   @ Tag_ABI_align_preserved
 ; ARMv8R-NOFPU-NOT: .eabi_attribute 27
 ; ARMv8R-SP: .eabi_attribute 27, 1   @ Tag_ABI_HardFP_use
 ; ARMv8R-NEON-NOT: .eabi_attribute 27
 ; ARMv8R-NOFPU-NOT: .eabi_attribute 36
 ; ARMv8R-SP: .eabi_attribute 36, 1   @ Tag_FP_HP_extension
 ; ARMv8R-NEON: .eabi_attribute 36, 1   @ Tag_FP_HP_extension
-; ARMv8R: .eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format
 ; ARMv8R: .eabi_attribute 42, 1   @ Tag_MPextension_use
-; ARMv8R: .eabi_attribute 14, 0   @ Tag_ABI_PCS_R9_use
 ; ARMv8R: .eabi_attribute 68, 2   @ Tag_Virtualization_use
+; ARMv8R: .eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format
+; ARMv8R: .eabi_attribute 14, 0   @ Tag_ABI_PCS_R9_use
 
 define i32 @f(i64 %z) {
     ret i32 0
diff --git a/test/CodeGen/ARM/darwin-tls-preserved.ll b/test/CodeGen/ARM/darwin-tls-preserved.ll
new file mode 100644
index 0000000000000..4969fabfd9b3c
--- /dev/null
+++ b/test/CodeGen/ARM/darwin-tls-preserved.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -arm-atomic-cfg-tidy=0 -o - %s | FileCheck %s
+
+@tls_var = thread_local global i32 0
+
+; r9 and r12 can be live across the asm, but those get clobbered by the TLS
+; access (in a different BB to order it).
+define i32 @test_regs_preserved(i32* %ptr1, i32* %ptr2, i1 %tst1) {
+; CHECK-LABEL: test_regs_preserved:
+; CHECK: str {{.*}}, [sp
+; CHECK: mov {{.*}}, r12
+entry:
+  call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r10},~{r11},~{r13},~{lr}"()
+  br i1 %tst1, label %get_tls, label %done
+
+get_tls:
+  %val = load i32, i32* @tls_var
+  br label %done
+
+done:
+  %res = phi i32 [%val, %get_tls], [0, %entry]
+  store i32 42, i32* %ptr1
+  store i32 42, i32* %ptr2
+  ret i32 %res
+}
diff --git a/test/CodeGen/ARM/divmod-hwdiv.ll b/test/CodeGen/ARM/divmod-hwdiv.ll
new file mode 100644
index 0000000000000..4cc316ffa3ea6
--- /dev/null
+++ b/test/CodeGen/ARM/divmod-hwdiv.ll
@@ -0,0 +1,37 @@
+; The hwdiv subtarget feature should only influence thumb, not arm.
+; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,THUMB-HWDIV
+; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+
+; The hwdiv-arm subtarget feature should only influence arm, not thumb.
+; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,ARM-HWDIV
+; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
+
+define arm_aapcscc i32 @test_i32_srem(i32 %x, i32 %y) {
+; ALL-LABEL: test_i32_srem:
+; ARM-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1
+; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1
+; ARM-HWDIV: sub r0, r0, [[P]]
+; THUMB-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1
+; THUMB-HWDIV: mls r0, [[Q]], r1, r0
+; AEABI-NOHWDIV: bl __aeabi_idivmod
+; AEABI-NOHWDIV: mov r0, r1
+  %r = srem i32 %x, %y
+  ret i32 %r
+}
+
+define arm_aapcscc i32 @test_i32_urem(i32 %x, i32 %y) {
+; ALL-LABEL: test_i32_urem:
+; ARM-HWDIV: udiv [[Q:r[0-9]+]], r0, r1
+; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1
+; ARM-HWDIV: sub r0, r0, [[P]]
+; THUMB-HWDIV: udiv [[Q:r[0-9]+]], r0, r1
+; THUMB-HWDIV: mls r0, [[Q]], r1, r0
+; AEABI-NOHWDIV: bl __aeabi_uidivmod
+; AEABI-NOHWDIV: mov r0, r1
+  %r = urem i32 %x, %y
+  ret i32 %r
+}
diff --git a/test/CodeGen/ARM/fpoffset_overflow.mir b/test/CodeGen/ARM/fpoffset_overflow.mir
new file mode 100644
index 0000000000000..9c6cd931b1532
--- /dev/null
+++ b/test/CodeGen/ARM/fpoffset_overflow.mir
@@ -0,0 +1,94 @@
+# RUN: llc -o - %s -mtriple=thumbv7-- -run-pass=stack-protector -run-pass=prologepilog | FileCheck %s
+---
+# This should trigger an emergency spill in the register scavenger because the
+# frame offset into the large argument is too large.
+# CHECK-LABEL: name: func0
+# CHECK: t2STRi12 killed %r7, %sp, 0, 14, _ :: (store 4 into %stack.0)
+# CHECK: %r7 = t2ADDri killed %sp, 4096, 14, _, _
+# CHECK: %r11 = t2LDRi12 killed %r7, 36, 14, _ :: (load 4)
+# CHECK: %r7 = t2LDRi12 %sp, 0, 14, _ :: (load 4 from %stack.0)
+name: func0
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, offset: 4084, size: 4, alignment: 4, isImmutable: true,
+      isAliased: false }
+  - { id: 1, offset: -12, size: 4096, alignment: 4, isImmutable: false,
+      isAliased: false }
+body: |
+  bb.0:
+    %r0 = IMPLICIT_DEF
+    %r1 = IMPLICIT_DEF
+    %r2 = IMPLICIT_DEF
+    %r3 = IMPLICIT_DEF
+    %r4 = IMPLICIT_DEF
+    %r5 = IMPLICIT_DEF
+    %r6 = IMPLICIT_DEF
+    %r8 = IMPLICIT_DEF
+    %r9 = IMPLICIT_DEF
+    %r10 = IMPLICIT_DEF
+    %r11 = IMPLICIT_DEF
+    %r12 = IMPLICIT_DEF
+    %lr = IMPLICIT_DEF
+
+    %r11 = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4)
+
+    KILL %r0
+    KILL %r1
+    KILL %r2
+    KILL %r3
+    KILL %r4
+    KILL %r5
+    KILL %r6
+    KILL %r8
+    KILL %r9
+    KILL %r10
+    KILL %r11
+    KILL %r12
+    KILL %lr
+...
+---
+# This should not trigger an emergency spill yet.
+# CHECK-LABEL: name: func1
+# CHECK-NOT: t2STRi12
+# CHECK-NOT: t2ADDri
+# CHECK: %r11 = t2LDRi12 %sp, 4092, 14, _ :: (load 4)
+# CHECK-NOT: t2LDRi12
+name: func1
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, offset: 4044, size: 4, alignment: 4, isImmutable: true,
+      isAliased: false }
+  - { id: 1, offset: -12, size: 4056, alignment: 4, isImmutable: false,
+      isAliased: false }
+body: |
+  bb.0:
+    %r0 = IMPLICIT_DEF
+    %r1 = IMPLICIT_DEF
+    %r2 = IMPLICIT_DEF
+    %r3 = IMPLICIT_DEF
+    %r4 = IMPLICIT_DEF
+    %r5 = IMPLICIT_DEF
+    %r6 = IMPLICIT_DEF
+    %r8 = IMPLICIT_DEF
+    %r9 = IMPLICIT_DEF
+    %r10 = IMPLICIT_DEF
+    %r11 = IMPLICIT_DEF
+    %r12 = IMPLICIT_DEF
+    %lr = IMPLICIT_DEF
+
+    %r11 = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4)
+
+    KILL %r0
+    KILL %r1
+    KILL %r2
+    KILL %r3
+    KILL %r4
+    KILL %r5
+    KILL %r6
+    KILL %r8
+    KILL %r9
+    KILL %r10
+    KILL %r11
+    KILL %r12
+    KILL %lr
+...
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index d874884dcb393..fb204debf6127 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -30,10 +30,9 @@ entry:
 define void @t1(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t1:
-; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; CHECK: adds r0, #15
-; CHECK: adds r1, #15
+; CHECK: movs [[INC:r[0-9]+]], #15
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1], [[INC]]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]]
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
@@ -43,13 +42,15 @@ entry:
 define void @t2(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t2:
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+; CHECK: movs [[INC:r[0-9]+]], #32
+; CHECK: add.w   r3, r0, #16
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]]
 ; CHECK: movw [[REG2:r[0-9]+]], #16716
 ; CHECK: movt [[REG2:r[0-9]+]], #72
-; CHECK: str [[REG2]], [r0, #32]
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
+; CHECK: str [[REG2]], [r0]
 ; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
   ret void
 }
diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll
index f6f8d5623509e..b86874692acad 100644
--- a/test/CodeGen/ARM/memset-inline.ll
+++ b/test/CodeGen/ARM/memset-inline.ll
@@ -13,10 +13,10 @@ entry:
 define void @t2() nounwind ssp {
 entry:
 ; CHECK-LABEL: t2:
-; CHECK: add.w r1, r0, #10
 ; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
-; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: movs r1, #10
+; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2], r1
+; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2]
   %buf = alloca [26 x i8], align 1
   %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
   call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
index db9bc6ccdd0c8..0a7f7698fa88c 100644
--- a/test/CodeGen/ARM/vbits.ll
+++ b/test/CodeGen/ARM/vbits.ll
@@ -1,8 +1,14 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 | FileCheck %s
 
 define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_andi8:
-;CHECK: vand
+; CHECK-LABEL: v_andi8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vand d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = and <8 x i8> %tmp1, %tmp2
@@ -10,8 +16,13 @@ define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_andi16:
-;CHECK: vand
+; CHECK-LABEL: v_andi16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vand d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = and <4 x i16> %tmp1, %tmp2
@@ -19,8 +30,13 @@ define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_andi32:
-;CHECK: vand
+; CHECK-LABEL: v_andi32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vand d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = and <2 x i32> %tmp1, %tmp2
@@ -28,8 +44,13 @@ define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_andi64:
-;CHECK: vand
+; CHECK-LABEL: v_andi64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vand d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = and <1 x i64> %tmp1, %tmp2
@@ -37,8 +58,14 @@ define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_andQi8:
-;CHECK: vand
+; CHECK-LABEL: v_andQi8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vand q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = and <16 x i8> %tmp1, %tmp2
@@ -46,8 +73,14 @@ define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_andQi16:
-;CHECK: vand
+; CHECK-LABEL: v_andQi16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vand q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = and <8 x i16> %tmp1, %tmp2
@@ -55,8 +88,14 @@ define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_andQi32:
-;CHECK: vand
+; CHECK-LABEL: v_andQi32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vand q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = and <4 x i32> %tmp1, %tmp2
@@ -64,8 +103,14 @@ define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_andQi64:
-;CHECK: vand
+; CHECK-LABEL: v_andQi64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vand q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = and <2 x i64> %tmp1, %tmp2
@@ -73,8 +118,13 @@ define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_bici8:
-;CHECK: vbic
+; CHECK-LABEL: v_bici8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vbic d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -83,8 +133,13 @@ define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_bici16:
-;CHECK: vbic
+; CHECK-LABEL: v_bici16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vbic d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -93,8 +148,13 @@ define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_bici32:
-;CHECK: vbic
+; CHECK-LABEL: v_bici32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vbic d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
@@ -103,8 +163,13 @@ define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_bici64:
-;CHECK: vbic
+; CHECK-LABEL: v_bici64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vbic d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
@@ -113,8 +178,14 @@ define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_bicQi8:
-;CHECK: vbic
+; CHECK-LABEL: v_bicQi8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vbic q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -123,8 +194,14 @@ define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_bicQi16:
-;CHECK: vbic
+; CHECK-LABEL: v_bicQi16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vbic q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -133,8 +210,14 @@ define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_bicQi32:
-;CHECK: vbic
+; CHECK-LABEL: v_bicQi32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vbic q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
@@ -143,8 +226,14 @@ define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_bicQi64:
-;CHECK: vbic
+; CHECK-LABEL: v_bicQi64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vbic q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
@@ -153,8 +242,13 @@ define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_eori8:
-;CHECK: veor
+; CHECK-LABEL: v_eori8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    veor d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = xor <8 x i8> %tmp1, %tmp2
@@ -162,8 +256,13 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_eori16:
-;CHECK: veor
+; CHECK-LABEL: v_eori16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    veor d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = xor <4 x i16> %tmp1, %tmp2
@@ -171,8 +270,13 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_eori32:
-;CHECK: veor
+; CHECK-LABEL: v_eori32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    veor d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = xor <2 x i32> %tmp1, %tmp2
@@ -180,8 +284,13 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_eori64:
-;CHECK: veor
+; CHECK-LABEL: v_eori64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    veor d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = xor <1 x i64> %tmp1, %tmp2
@@ -189,8 +298,14 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_eorQi8:
-;CHECK: veor
+; CHECK-LABEL: v_eorQi8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    veor q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = xor <16 x i8> %tmp1, %tmp2
@@ -198,8 +313,14 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_eorQi16:
-;CHECK: veor
+; CHECK-LABEL: v_eorQi16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    veor q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = xor <8 x i16> %tmp1, %tmp2
@@ -207,8 +328,14 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_eorQi32:
-;CHECK: veor
+; CHECK-LABEL: v_eorQi32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    veor q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = xor <4 x i32> %tmp1, %tmp2
@@ -216,8 +343,14 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_eorQi64:
-;CHECK: veor
+; CHECK-LABEL: v_eorQi64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    veor q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = xor <2 x i64> %tmp1, %tmp2
@@ -225,72 +358,113 @@ define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: v_mvni8:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvni8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vmvn d16, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: v_mvni16:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvni16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vmvn d16, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: v_mvni32:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvni32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vmvn d16, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
-;CHECK-LABEL: v_mvni64:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvni64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vmvn d16, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: v_mvnQi8:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvnQi8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT:    vmvn q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: v_mvnQi16:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvnQi16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT:    vmvn q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: v_mvnQi32:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvnQi32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT:    vmvn q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: v_mvnQi64:
-;CHECK: vmvn
+; CHECK-LABEL: v_mvnQi64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT:    vmvn q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
 	ret <2 x i64> %tmp2
 }
 
 define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_orri8:
-;CHECK: vorr
+; CHECK-LABEL: v_orri8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vorr d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = or <8 x i8> %tmp1, %tmp2
@@ -298,8 +472,13 @@ define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_orri16:
-;CHECK: vorr
+; CHECK-LABEL: v_orri16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vorr d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = or <4 x i16> %tmp1, %tmp2
@@ -307,8 +486,13 @@ define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_orri32:
-;CHECK: vorr
+; CHECK-LABEL: v_orri32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vorr d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = or <2 x i32> %tmp1, %tmp2
@@ -316,8 +500,13 @@ define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_orri64:
-;CHECK: vorr
+; CHECK-LABEL: v_orri64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vorr d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = or <1 x i64> %tmp1, %tmp2
@@ -325,8 +514,14 @@ define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_orrQi8:
-;CHECK: vorr
+; CHECK-LABEL: v_orrQi8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vorr q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = or <16 x i8> %tmp1, %tmp2
@@ -334,8 +529,14 @@ define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_orrQi16:
-;CHECK: vorr
+; CHECK-LABEL: v_orrQi16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vorr q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = or <8 x i16> %tmp1, %tmp2
@@ -343,8 +544,14 @@ define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_orrQi32:
-;CHECK: vorr
+; CHECK-LABEL: v_orrQi32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vorr q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = or <4 x i32> %tmp1, %tmp2
@@ -352,8 +559,14 @@ define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_orrQi64:
-;CHECK: vorr
+; CHECK-LABEL: v_orrQi64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vorr q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = or <2 x i64> %tmp1, %tmp2
@@ -361,8 +574,13 @@ define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: v_orni8:
-;CHECK: vorn
+; CHECK-LABEL: v_orni8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vorn d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -371,8 +589,13 @@ define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: v_orni16:
-;CHECK: vorn
+; CHECK-LABEL: v_orni16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vorn d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -381,8 +604,13 @@ define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: v_orni32:
-;CHECK: vorn
+; CHECK-LABEL: v_orni32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vorn d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
@@ -391,8 +619,13 @@ define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: v_orni64:
-;CHECK: vorn
+; CHECK-LABEL: v_orni64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vorn d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
@@ -401,8 +634,14 @@ define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: v_ornQi8:
-;CHECK: vorn
+; CHECK-LABEL: v_ornQi8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vorn q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -411,8 +650,14 @@ define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: v_ornQi16:
-;CHECK: vorn
+; CHECK-LABEL: v_ornQi16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vorn q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -421,8 +666,14 @@ define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: v_ornQi32:
-;CHECK: vorn
+; CHECK-LABEL: v_ornQi32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vorn q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
@@ -431,8 +682,14 @@ define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: v_ornQi64:
-;CHECK: vorn
+; CHECK-LABEL: v_ornQi64:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vorn q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
@@ -441,8 +698,13 @@ define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vtsti8:
-;CHECK: vtst.8
+; CHECK-LABEL: vtsti8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtst.8 d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = and <8 x i8> %tmp1, %tmp2
@@ -452,8 +714,13 @@ define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: vtsti16:
-;CHECK: vtst.16
+; CHECK-LABEL: vtsti16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtst.16 d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = and <4 x i16> %tmp1, %tmp2
@@ -463,8 +730,13 @@ define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: vtsti32:
-;CHECK: vtst.32
+; CHECK-LABEL: vtsti32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtst.32 d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = and <2 x i32> %tmp1, %tmp2
@@ -474,8 +746,14 @@ define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: vtstQi8:
-;CHECK: vtst.8
+; CHECK-LABEL: vtstQi8:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vtst.8 q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = and <16 x i8> %tmp1, %tmp2
@@ -485,8 +763,14 @@ define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vtstQi16:
-;CHECK: vtst.16
+; CHECK-LABEL: vtstQi16:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vtst.16 q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = and <8 x i16> %tmp1, %tmp2
@@ -496,8 +780,14 @@ define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: vtstQi32:
-;CHECK: vtst.32
+; CHECK-LABEL: vtstQi32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    vtst.32 q8, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = and <4 x i32> %tmp1, %tmp2
@@ -508,19 +798,24 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 
 define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
 ; CHECK-LABEL: v_orrimm:
-; CHECK-NOT: vmov
-; CHECK-NOT: vmvn
-; CHECK: vorr
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vorr.i32 d16, #0x1000000
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
 	ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
-; CHECK: v_orrimmQ
-; CHECK-NOT: vmov
-; CHECK-NOT: vmvn
-; CHECK: vorr
+; CHECK-LABEL: v_orrimmQ:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT:    vorr.i32 q8, #0x1000000
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
 	ret <16 x i8> %tmp3
@@ -528,9 +823,11 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
 
 define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
 ; CHECK-LABEL: v_bicimm:
-; CHECK-NOT: vmov
-; CHECK-NOT: vmvn
-; CHECK: vbic
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vbic.i32 d16, #0xff000000
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
 	ret <8 x i8> %tmp3
@@ -538,10 +835,29 @@ define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
 
 define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
 ; CHECK-LABEL: v_bicimmQ:
-; CHECK-NOT: vmov
-; CHECK-NOT: vmvn
-; CHECK: vbic
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT:    vbic.i32 q8, #0xff000000
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
 	ret <16 x i8> %tmp3
 }
+
+define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: hidden_not_v4i32:
+; CHECK:       @ BB#0:
+; CHECK-NEXT:    vmov d19, r2, r3
+; CHECK-NEXT:    vmov.i32 q8, #0x6
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vbic q8, q8, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
+  %xor = xor <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
+  %and = and <4 x i32> %xor, <i32 6, i32 6, i32 6, i32 6>
+  ret <4 x i32> %and
+}
+
diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll
index ed734723a86d3..4f7ebc938d4c7 100644
--- a/test/CodeGen/ARM/vector-load.ll
+++ b/test/CodeGen/ARM/vector-load.ll
@@ -253,11 +253,22 @@ define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
 }
 
 ; CHECK-LABEL: test_silly_load:
-; CHECK: ldr {{r[0-9]+}}, [r0, #24]
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]!
-; CHECK: vldr d{{[0-9]+}}, [r0]
+; CHECK: vldr d{{[0-9]+}}, [r0, #16]
+; CHECK: movs r1, #24
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128], r1
+; CHECK: ldr {{r[0-9]+}}, [r0]
 
 define void @test_silly_load(<28 x i8>* %addr) {
   load volatile <28 x i8>, <28 x i8>* %addr
   ret void
 }
+
+define <4 x i32>* @test_vld1_immoffset(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) {
+; CHECK-LABEL: test_vld1_immoffset:
+; CHECK: movs [[INC:r[0-9]+]], #32
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0], [[INC]]
+  %val = load <4 x i32>, <4 x i32>* %ptr.in
+  store <4 x i32> %val, <4 x i32>* %ptr.out
+  %next = getelementptr <4 x i32>, <4 x i32>* %ptr.in, i32 2
+  ret <4 x i32>* %next
+}
diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll
index 161bbf1d0fde8..e8c1a78a9113b 100644
--- a/test/CodeGen/ARM/vector-store.ll
+++ b/test/CodeGen/ARM/vector-store.ll
@@ -256,3 +256,13 @@ define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val)
         store <4 x i8>* %inc, <4 x i8>** %ptr
 	ret void
 }
+
+define <4 x i32>* @test_vst1_1reg(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) {
+; CHECK-LABEL: test_vst1_1reg:
+; CHECK: movs [[INC:r[0-9]+]], #32
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r1], [[INC]]
+  %val = load <4 x i32>, <4 x i32>* %ptr.in
+  store <4 x i32> %val, <4 x i32>* %ptr.out
+  %next = getelementptr <4 x i32>, <4 x i32>* %ptr.out, i32 2
+  ret <4 x i32>* %next
+}
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index c6d5747f35093..71ca0f7915242 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -310,6 +310,23 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 	ret <4 x i16> %tmp5
 }
 
+define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {
+;CHECK-LABEL: vld2dupi16_odd_update:
+;CHECK: mov [[INC:r[0-9]+]], #6
+;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]]
+	%A = load i16*, i16** %ptr
+        %A2 = bitcast i16* %A to i8*
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = add <4 x i16> %tmp2, %tmp4
+	%tmp6 = getelementptr i16, i16* %A, i32 3
+	store i16* %tmp6, i16** %ptr
+	ret <4 x i16> %tmp5
+}
+
 define <2 x i32> @vld2dupi32(i8* %A) nounwind {
 ;CHECK-LABEL: vld2dupi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 2c14bc2d8f4eb..866641f3fbbd9 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -150,6 +150,22 @@ define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 	ret <2 x i32> %tmp5
 }
 
+define <2 x i32> @vld2lanei32_odd_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: vld2lanei32_odd_update:
+;CHECK: mov [[INC:r[0-9]+]], #12
+;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}], [[INC]]
+	%A = load i32*, i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+	%tmp5 = add <2 x i32> %tmp3, %tmp4
+	%tmp6 = getelementptr i32, i32* %A, i32 3
+	store i32* %tmp6, i32** %ptr
+	ret <2 x i32> %tmp5
+}
+
 define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vld2lanef:
 ;CHECK: vld2.32
diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll
index e4dd572a41b4d..2e0718877e96d 100644
--- a/test/CodeGen/ARM/vtbl.ll
+++ b/test/CodeGen/ARM/vtbl.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - -verify-machineinstrs | FileCheck %s
 
 %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>, <8 x i8> }