diff options
Diffstat (limited to 'test/CodeGen/ARM')
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir | 241 | ||||
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll | 39 | ||||
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-isel.ll | 56 | ||||
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-legalizer.mir | 156 | ||||
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir | 164 | ||||
-rw-r--r-- | test/CodeGen/ARM/alloc-no-stack-realign.ll | 101 | ||||
-rw-r--r-- | test/CodeGen/ARM/build-attributes.ll | 461 | ||||
-rw-r--r-- | test/CodeGen/ARM/darwin-tls-preserved.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/ARM/divmod-hwdiv.ll | 37 | ||||
-rw-r--r-- | test/CodeGen/ARM/fpoffset_overflow.mir | 94 | ||||
-rw-r--r-- | test/CodeGen/ARM/memcpy-inline.ll | 17 | ||||
-rw-r--r-- | test/CodeGen/ARM/memset-inline.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/ARM/vbits.ll | 560 | ||||
-rw-r--r-- | test/CodeGen/ARM/vector-load.ll | 17 | ||||
-rw-r--r-- | test/CodeGen/ARM/vector-store.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/ARM/vlddup.ll | 17 | ||||
-rw-r--r-- | test/CodeGen/ARM/vldlane.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/ARM/vtbl.ll | 2 |
18 files changed, 1597 insertions, 421 deletions
diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir index 66d9033a6d7cb..21c774133f896 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -12,6 +12,15 @@ define void @test_fadd_s32() #0 { ret void } define void @test_fadd_s64() #0 { ret void } + define void @test_sub_s8() { ret void } + define void @test_sub_s16() { ret void } + define void @test_sub_s32() { ret void } + + define void @test_mul_s8() #1 { ret void } + define void @test_mul_s16() #1 { ret void } + define void @test_mul_s32() #1 { ret void } + define void @test_mulv5_s32() { ret void } + define void @test_load_from_stack() { ret void } define void @test_load_f32() #0 { ret void } define void @test_load_f64() #0 { ret void } @@ -24,6 +33,7 @@ define void @test_soft_fp_double() #0 { ret void } attributes #0 = { "target-features"="+vfp2,-neonfp" } + attributes #1 = { "target-features"="+v6" } ... --- name: test_zext_s1 @@ -297,6 +307,237 @@ body: | ; CHECK: BX_RET 14, _, implicit %d0 ... --- +name: test_sub_s8 +# CHECK-LABEL: name: test_sub_s8 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK-DAG: id: 0, class: gpr +# CHECK-DAG: id: 1, class: gpr +# CHECK-DAG: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s8) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s8) = G_SUB %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s8) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_sub_s16 +# CHECK-LABEL: name: test_sub_s16 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK-DAG: id: 0, class: gpr +# CHECK-DAG: id: 1, class: gpr +# CHECK-DAG: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s16) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s16) = G_SUB %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s16) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_sub_s32 +# CHECK-LABEL: name: test_sub_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_SUB %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mul_s8 +# CHECK-LABEL: name: test_mul_s8 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK-DAG: id: 0, class: gprnopc +# CHECK-DAG: id: 1, class: gprnopc +# CHECK-DAG: id: 2, class: gprnopc +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s8) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s8) = G_MUL %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s8) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mul_s16 +# CHECK-LABEL: name: test_mul_s16 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK-DAG: id: 0, class: gprnopc +# CHECK-DAG: id: 1, class: gprnopc +# CHECK-DAG: id: 2, class: gprnopc +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s16) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s16) = G_MUL %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s16) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mul_s32 +# CHECK-LABEL: name: test_mul_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gprnopc +# CHECK: id: 1, class: gprnopc +# CHECK: id: 2, class: gprnopc +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_MUL %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mulv5_s32 +# CHECK-LABEL: name: test_mulv5_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gprnopc +# CHECK: id: 1, class: gprnopc +# CHECK: id: 2, class: gprnopc +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_MUL %0, %1 + ; CHECK: early-clobber [[VREGRES:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- name: test_load_from_stack # CHECK-LABEL: name: test_load_from_stack legalized: true diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index a7f5ec33bee3c..cf77ce352074d 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -35,6 +35,19 @@ entry: ret i8 %sum } +define i8 @test_sub_i8(i8 %x, i8 %y) { +; CHECK-LABEL: name: test_sub_i8 +; CHECK: liveins: %r0, %r1 +; CHECK-DAG: [[VREGX:%[0-9]+]](s8) = COPY %r0 +; CHECK-DAG: [[VREGY:%[0-9]+]](s8) = COPY %r1 +; CHECK: [[RES:%[0-9]+]](s8) = G_SUB [[VREGX]], [[VREGY]] +; CHECK: %r0 = COPY [[RES]](s8) +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %res = sub i8 %x, %y + ret i8 %res +} + define signext i8 @test_return_sext_i8(i8 %x) { ; CHECK-LABEL: name: test_return_sext_i8 ; CHECK: liveins: %r0 @@ -59,6 +72,19 @@ entry: ret i16 %sum } +define i16 @test_sub_i16(i16 %x, i16 %y) { +; CHECK-LABEL: name: test_sub_i16 +; CHECK: liveins: %r0, %r1 +; CHECK-DAG: [[VREGX:%[0-9]+]](s16) = COPY %r0 +; CHECK-DAG: [[VREGY:%[0-9]+]](s16) = COPY %r1 +; CHECK: [[RES:%[0-9]+]](s16) = G_SUB [[VREGX]], [[VREGY]] +; CHECK: %r0 = COPY [[RES]](s16) +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %res = sub i16 %x, %y + ret i16 %res +} + define zeroext i16 @test_return_zext_i16(i16 %x) { ; CHECK-LABEL: name: test_return_zext_i16 ; CHECK: liveins: %r0 @@ -83,6 +109,19 @@ entry: ret i32 %sum } +define i32 @test_sub_i32(i32 %x, i32 %y) { +; CHECK-LABEL: name: test_sub_i32 +; CHECK: liveins: %r0, %r1 +; CHECK-DAG: [[VREGX:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[VREGY:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[RES:%[0-9]+]](s32) = G_SUB [[VREGX]], [[VREGY]] +; CHECK: %r0 = COPY [[RES]](s32) +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %res = sub i32 %x, %y + ret i32 %res +} + define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { ; CHECK-LABEL: name: test_stack_args ; CHECK: fixedStack: diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll index 236dcbeb84c52..f3ca2915f306e 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel %s -o - | FileCheck %s +; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v6 -global-isel %s -o - | FileCheck %s define void @test_void_return() { ; CHECK-LABEL: test_void_return: @@ -67,6 +67,60 @@ entry: ret i32 %sum } +define i8 @test_sub_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_sub_i8: +; CHECK: sub r0, r0, r1 +; CHECK: bx lr +entry: + %sum = sub i8 %x, %y + ret i8 %sum +} + +define i16 @test_sub_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_sub_i16: +; CHECK: sub r0, r0, r1 +; CHECK: bx lr +entry: + %sum = sub i16 %x, %y + ret i16 %sum +} + +define i32 @test_sub_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_sub_i32: +; CHECK: sub r0, r0, r1 +; CHECK: bx lr +entry: + %sum = sub i32 %x, %y + ret i32 %sum +} + +define i8 @test_mul_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_mul_i8: +; CHECK: mul r0, r0, r1 +; CHECK: bx lr +entry: + %sum = mul i8 %x, %y + ret i8 %sum +} + +define i16 @test_mul_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_mul_i16: +; CHECK: mul r0, r0, r1 +; CHECK: bx lr +entry: + %sum = mul i16 %x, %y + ret i16 %sum +} + +define i32 @test_mul_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_mul_i32: +; CHECK: mul r0, r0, r1 +; CHECK: bx lr +entry: + %sum = mul i32 %x, %y + ret i32 %sum +} + define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { ; CHECK-LABEL: test_stack_args_i32: ; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4 diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index cbff7e12fb77c..625d35acf17b9 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -7,6 +7,14 @@ define void @test_add_s16() { ret void } define void @test_add_s32() { ret void } + define void @test_sub_s8() { ret void } + define void @test_sub_s16() { ret void } + define void @test_sub_s32() { ret void } + + define void @test_mul_s8() { ret void } + define void @test_mul_s16() { ret void } + define void @test_mul_s32() { ret void } + define void @test_load_from_stack() { ret void } define void @test_legal_loads() #0 { ret void } define void @test_legal_stores() #0 { ret void } @@ -139,6 +147,154 @@ body: | ... --- +name: test_sub_s8 +# CHECK-LABEL: name: test_sub_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_SUB %0, %1 + ; G_SUB with s8 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s8) = G_SUB {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_sub_s16 +# CHECK-LABEL: name: test_sub_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_SUB %0, %1 + ; G_SUB with s16 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s16) = G_SUB {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_sub_s32 +# CHECK-LABEL: name: test_sub_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_SUB %0, %1 + ; G_SUB with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_SUB {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s8 +# CHECK-LABEL: name: test_mul_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_MUL %0, %1 + ; G_MUL with s8 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s8) = G_MUL {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_mul_s16 +# CHECK-LABEL: name: test_mul_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_MUL %0, %1 + ; G_MUL with s16 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s16) = G_MUL {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s32 +# CHECK-LABEL: name: test_mul_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_MUL %0, %1 + ; G_MUL with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_MUL {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- name: test_load_from_stack # CHECK-LABEL: name: test_load_from_stack legalized: false diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index fbf8d81322f8f..e7935832f98a8 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -5,6 +5,14 @@ define void @test_add_s8() { ret void } define void @test_add_s1() { ret void } + define void @test_sub_s32() { ret void } + define void @test_sub_s16() { ret void } + define void @test_sub_s8() { ret void } + + define void @test_mul_s32() { ret void } + define void @test_mul_s16() { ret void } + define void @test_mul_s8() { ret void } + define void @test_loads() #0 { ret void } define void @test_stores() #0 { ret void } @@ -126,6 +134,162 @@ body: | ... --- +name: test_sub_s32 +# CHECK-LABEL: name: test_sub_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_SUB %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_sub_s16 +# CHECK-LABEL: name: test_sub_s16 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_SUB %0, %1 + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_sub_s8 +# CHECK-LABEL: name: test_sub_s8 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_SUB %0, %1 + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s32 +# CHECK-LABEL: name: test_mul_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_MUL %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s16 +# CHECK-LABEL: name: test_mul_s16 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_MUL %0, %1 + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s8 +# CHECK-LABEL: name: test_mul_s8 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_MUL %0, %1 + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 + +... +--- name: test_loads # CHECK-LABEL: name: test_loads legalized: true diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll index 0e077b3aee5a1..64c279b0f2187 100644 --- a/test/CodeGen/ARM/alloc-no-stack-realign.ll +++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll @@ -7,31 +7,32 @@ define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" { entry: -; CHECK-LABEL: test1 -; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] -; CHECK: add r[[R2:[0-9]+]], r1, #48 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: mov r[[R2:[0-9]+]], r[[R1]] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: mov r[[R1:[0-9]+]], sp -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #48 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] +; CHECK-LABEL: test1: +; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], #32 +; CHECK: mov r[[R2:[0-9]+]], sp +; CHECK: mov r[[R3:[0-9]+]], r[[R2]] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]] +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] %retval = alloca <16 x float>, align 16 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 store <16 x float> %0, <16 x float>* %retval @@ -42,30 +43,32 @@ entry: define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp { entry: -; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: mov r[[R2:[0-9]+]], r[[R1]] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: mov r[[R1:[0-9]+]], sp -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #48 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] +; CHECK-LABEL: test2: +; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], #32 +; CHECK: mov r[[R2:[0-9]+]], sp +; CHECK: mov r[[R3:[0-9]+]], r[[R2]] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]] +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] %retval = alloca <16 x float>, align 16 diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index fc85a3a2e6834..699ef6e92a4ff 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -231,6 +231,11 @@ ; V6: .eabi_attribute 6, 6 ; V6: .eabi_attribute 8, 1 ;; We assume round-to-nearest by default (matches GCC) +; V6-NOT: .eabi_attribute 27 +; V6-NOT: .eabi_attribute 36 +; V6-NOT: .eabi_attribute 42 +; V6-NOT: .eabi_attribute 44 +; V6-NOT: .eabi_attribute 68 ; V6-NOT: .eabi_attribute 19 ;; The default choice made by llc is for a V6 CPU without an FPU. ;; This is not an interesting detail, but for such CPUs, the default intention is to use @@ -242,13 +247,8 @@ ; V6: .eabi_attribute 23, 3 ; V6: .eabi_attribute 24, 1 ; V6: .eabi_attribute 25, 1 -; V6-NOT: .eabi_attribute 27 ; V6-NOT: .eabi_attribute 28 -; V6-NOT: .eabi_attribute 36 ; V6: .eabi_attribute 38, 1 -; V6-NOT: .eabi_attribute 42 -; V6-NOT: .eabi_attribute 44 -; V6-NOT: .eabi_attribute 68 ; V6-FAST-NOT: .eabi_attribute 19 ;; Despite the V6 CPU having no FPU by default, we chose to flush to @@ -262,9 +262,14 @@ ;; We emit 6, 12 for both v6-M and v6S-M, technically this is incorrect for ;; V6-M, however we don't model the OS extension so this is fine. ; V6M: .eabi_attribute 6, 12 -; V6M-NOT: .eabi_attribute 7 +; V6M: .eabi_attribute 7, 77 ; V6M: .eabi_attribute 8, 0 ; V6M: .eabi_attribute 9, 1 +; V6M-NOT: .eabi_attribute 27 +; V6M-NOT: .eabi_attribute 36 +; V6M-NOT: .eabi_attribute 42 +; V6M-NOT: .eabi_attribute 44 +; V6M-NOT: .eabi_attribute 68 ; V6M-NOT: .eabi_attribute 19 ;; The default choice made by llc is for a V6M CPU without an FPU. ;; This is not an interesting detail, but for such CPUs, the default intention is to use @@ -276,13 +281,8 @@ ; V6M: .eabi_attribute 23, 3 ; V6M: .eabi_attribute 24, 1 ; V6M: .eabi_attribute 25, 1 -; V6M-NOT: .eabi_attribute 27 ; V6M-NOT: .eabi_attribute 28 -; V6M-NOT: .eabi_attribute 36 ; V6M: .eabi_attribute 38, 1 -; V6M-NOT: .eabi_attribute 42 -; V6M-NOT: .eabi_attribute 44 -; V6M-NOT: .eabi_attribute 68 ; V6M-FAST-NOT: .eabi_attribute 19 ;; Despite the V6M CPU having no FPU by default, we chose to flush to @@ -298,6 +298,11 @@ ; ARM1156T2F-S: .eabi_attribute 8, 1 ; ARM1156T2F-S: .eabi_attribute 9, 2 ; ARM1156T2F-S: .fpu vfpv2 +; ARM1156T2F-S-NOT: .eabi_attribute 27 +; ARM1156T2F-S-NOT: .eabi_attribute 36 +; ARM1156T2F-S-NOT: .eabi_attribute 42 +; ARM1156T2F-S-NOT: .eabi_attribute 44 +; ARM1156T2F-S-NOT: .eabi_attribute 68 ; ARM1156T2F-S-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; ARM1156T2F-S: .eabi_attribute 20, 1 @@ -306,13 +311,8 @@ ; ARM1156T2F-S: .eabi_attribute 23, 3 ; ARM1156T2F-S: .eabi_attribute 24, 1 ; ARM1156T2F-S: .eabi_attribute 25, 1 -; ARM1156T2F-S-NOT: .eabi_attribute 27 ; ARM1156T2F-S-NOT: .eabi_attribute 28 -; ARM1156T2F-S-NOT: .eabi_attribute 36 ; ARM1156T2F-S: .eabi_attribute 38, 1 -; ARM1156T2F-S-NOT: .eabi_attribute 42 -; ARM1156T2F-S-NOT: .eabi_attribute 44 -; ARM1156T2F-S-NOT: .eabi_attribute 68 ; ARM1156T2F-S-FAST-NOT: .eabi_attribute 19 ;; V6 cores default to flush to positive zero (value 0). Note that value 2 is also equally @@ -327,6 +327,11 @@ ; V7M: .eabi_attribute 7, 77 ; V7M: .eabi_attribute 8, 0 ; V7M: .eabi_attribute 9, 2 +; V7M-NOT: .eabi_attribute 27 +; V7M-NOT: .eabi_attribute 36 +; V7M-NOT: .eabi_attribute 42 +; V7M-NOT: .eabi_attribute 44 +; V7M-NOT: .eabi_attribute 68 ; V7M-NOT: .eabi_attribute 19 ;; The default choice made by llc is for a V7M CPU without an FPU. ;; This is not an interesting detail, but for such CPUs, the default intention is to use @@ -338,13 +343,8 @@ ; V7M: .eabi_attribute 23, 3 ; V7M: .eabi_attribute 24, 1 ; V7M: .eabi_attribute 25, 1 -; V7M-NOT: .eabi_attribute 27 ; V7M-NOT: .eabi_attribute 28 -; V7M-NOT: .eabi_attribute 36 ; V7M: .eabi_attribute 38, 1 -; V7M-NOT: .eabi_attribute 42 -; V7M-NOT: .eabi_attribute 44 -; V7M-NOT: .eabi_attribute 68 ; V7M-FAST-NOT: .eabi_attribute 19 ;; Despite the V7M CPU having no FPU by default, we chose to flush @@ -357,6 +357,11 @@ ; V7: .syntax unified ; V7: .eabi_attribute 6, 10 +; V7-NOT: .eabi_attribute 27 +; V7-NOT: .eabi_attribute 36 +; V7-NOT: .eabi_attribute 42 +; V7-NOT: .eabi_attribute 44 +; V7-NOT: .eabi_attribute 68 ; V7-NOT: .eabi_attribute 19 ;; In safe-maths mode we default to an IEEE 754 compliant choice. ; V7: .eabi_attribute 20, 1 @@ -365,13 +370,8 @@ ; V7: .eabi_attribute 23, 3 ; V7: .eabi_attribute 24, 1 ; V7: .eabi_attribute 25, 1 -; V7-NOT: .eabi_attribute 27 ; V7-NOT: .eabi_attribute 28 -; V7-NOT: .eabi_attribute 36 ; V7: .eabi_attribute 38, 1 -; V7-NOT: .eabi_attribute 42 -; V7-NOT: .eabi_attribute 44 -; V7-NOT: .eabi_attribute 68 ; V7-FAST-NOT: .eabi_attribute 19 ;; The default CPU does have an FPU and it must be VFPv3 or better, so it flushes @@ -386,6 +386,9 @@ ; V7VE: .eabi_attribute 7, 65 @ Tag_CPU_arch_profile ; V7VE: .eabi_attribute 8, 1 @ Tag_ARM_ISA_use ; V7VE: .eabi_attribute 9, 2 @ Tag_THUMB_ISA_use +; V7VE: .eabi_attribute 42, 1 @ Tag_MPextension_use +; V7VE: .eabi_attribute 44, 2 @ Tag_DIV_use +; V7VE: .eabi_attribute 68, 3 @ Tag_Virtualization_use ; V7VE: .eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use ; V7VE: .eabi_attribute 20, 1 @ Tag_ABI_FP_denormal ; V7VE: .eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions @@ -393,19 +396,16 @@ ; V7VE: .eabi_attribute 24, 1 @ Tag_ABI_align_needed ; V7VE: .eabi_attribute 25, 1 @ Tag_ABI_align_preserved ; V7VE: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format -; V7VE: .eabi_attribute 42, 1 @ Tag_MPextension_use -; V7VE: .eabi_attribute 44, 2 @ Tag_DIV_use -; V7VE: .eabi_attribute 68, 3 @ Tag_Virtualization_use ; V8: .syntax unified ; V8: .eabi_attribute 67, "2.09" ; V8: .eabi_attribute 6, 14 +; V8-NOT: .eabi_attribute 44 ; V8-NOT: .eabi_attribute 19 ; V8: .eabi_attribute 20, 1 ; V8: .eabi_attribute 21, 1 ; V8-NOT: .eabi_attribute 22 ; V8: .eabi_attribute 23, 3 -; V8-NOT: .eabi_attribute 44 ; V8-FAST-NOT: .eabi_attribute 19 ;; The default does have an FPU, and for V8-A, it flushes preserving sign. @@ -496,6 +496,30 @@ ; CORTEX-A7-FPUV4: .fpu vfpv4 ; CORTEX-A7-CHECK-NOT: .eabi_attribute 19 + +; Tag_FP_HP_extension +; CORTEX-A7-CHECK: .eabi_attribute 36, 1 +; CORTEX-A7-NOFPU-NOT: .eabi_attribute 36 +; CORTEX-A7-FPUV4: .eabi_attribute 36, 1 + +; Tag_MPextension_use +; CORTEX-A7-CHECK: .eabi_attribute 42, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 42, 1 + +; Tag_DIV_use +; CORTEX-A7-CHECK: .eabi_attribute 44, 2 +; CORTEX-A7-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A7-FPUV4: .eabi_attribute 44, 2 + +; Tag_DSP_extension +; CORTEX-A7-CHECK-NOT: .eabi_attribute 46 + +; Tag_Virtualization_use +; CORTEX-A7-CHECK: .eabi_attribute 68, 3 +; CORTEX-A7-NOFPU: .eabi_attribute 68, 3 +; CORTEX-A7-FPUV4: .eabi_attribute 68, 3 + ; Tag_ABI_FP_denormal ;; We default to IEEE 754 compliance ; CORTEX-A7-CHECK: .eabi_attribute 20, 1 @@ -535,40 +559,20 @@ ; CORTEX-A7-NOFPU: .eabi_attribute 25, 1 ; CORTEX-A7-FPUV4: .eabi_attribute 25, 1 -; Tag_FP_HP_extension -; CORTEX-A7-CHECK: .eabi_attribute 36, 1 -; CORTEX-A7-NOFPU-NOT: .eabi_attribute 36 -; CORTEX-A7-FPUV4: .eabi_attribute 36, 1 - ; Tag_FP_16bit_format ; CORTEX-A7-CHECK: .eabi_attribute 38, 1 ; CORTEX-A7-NOFPU: .eabi_attribute 38, 1 ; CORTEX-A7-FPUV4: .eabi_attribute 38, 1 -; Tag_MPextension_use -; CORTEX-A7-CHECK: .eabi_attribute 42, 1 -; CORTEX-A7-NOFPU: .eabi_attribute 42, 1 -; CORTEX-A7-FPUV4: .eabi_attribute 42, 1 - -; Tag_DIV_use -; CORTEX-A7-CHECK: .eabi_attribute 44, 2 -; CORTEX-A7-NOFPU: .eabi_attribute 44, 2 -; CORTEX-A7-FPUV4: .eabi_attribute 44, 2 - -; Tag_DSP_extension -; CORTEX-A7-CHECK-NOT: .eabi_attribute 46 - -; Tag_Virtualization_use -; CORTEX-A7-CHECK: .eabi_attribute 68, 3 -; CORTEX-A7-NOFPU: .eabi_attribute 68, 3 -; CORTEX-A7-FPUV4: .eabi_attribute 68, 3 - ; CORTEX-A5-DEFAULT: .cpu cortex-a5 ; CORTEX-A5-DEFAULT: .eabi_attribute 6, 10 ; CORTEX-A5-DEFAULT: .eabi_attribute 7, 65 ; CORTEX-A5-DEFAULT: .eabi_attribute 8, 1 ; CORTEX-A5-DEFAULT: .eabi_attribute 9, 2 ; CORTEX-A5-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A5-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A5-DEFAULT-NOT: .eabi_attribute 44 +; CORTEX-A5-DEFAULT: .eabi_attribute 68, 1 ; CORTEX-A5-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A5-DEFAULT: .eabi_attribute 20, 1 @@ -577,9 +581,6 @@ ; CORTEX-A5-DEFAULT: .eabi_attribute 23, 3 ; CORTEX-A5-DEFAULT: .eabi_attribute 24, 1 ; CORTEX-A5-DEFAULT: .eabi_attribute 25, 1 -; CORTEX-A5-DEFAULT: .eabi_attribute 42, 1 -; CORTEX-A5-DEFAULT-NOT: .eabi_attribute 44 -; CORTEX-A5-DEFAULT: .eabi_attribute 68, 1 ; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 19 ;; The A5 defaults to a VFPv4 FPU, so it flushed preserving the sign when -ffast-math @@ -595,6 +596,8 @@ ; CORTEX-A5-NONEON: .eabi_attribute 8, 1 ; CORTEX-A5-NONEON: .eabi_attribute 9, 2 ; CORTEX-A5-NONEON: .fpu vfpv4-d16 +; CORTEX-A5-NONEON: .eabi_attribute 42, 1 +; CORTEX-A5-NONEON: .eabi_attribute 68, 1 ;; We default to IEEE 754 compliance ; CORTEX-A5-NONEON: .eabi_attribute 20, 1 ; CORTEX-A5-NONEON: .eabi_attribute 21, 1 @@ -602,8 +605,6 @@ ; CORTEX-A5-NONEON: .eabi_attribute 23, 3 ; CORTEX-A5-NONEON: .eabi_attribute 24, 1 ; CORTEX-A5-NONEON: .eabi_attribute 25, 1 -; CORTEX-A5-NONEON: .eabi_attribute 42, 1 -; CORTEX-A5-NONEON: .eabi_attribute 68, 1 ; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 19 ;; The A5 defaults to a VFPv4 FPU, so it flushed preserving sign when -ffast-math @@ -619,6 +620,8 @@ ; CORTEX-A5-NOFPU: .eabi_attribute 8, 1 ; CORTEX-A5-NOFPU: .eabi_attribute 9, 2 ; CORTEX-A5-NOFPU-NOT: .fpu +; CORTEX-A5-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 68, 1 ; CORTEX-A5-NOFPU-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A5-NOFPU: .eabi_attribute 20, 1 @@ -627,8 +630,6 @@ ; CORTEX-A5-NOFPU: .eabi_attribute 23, 3 ; CORTEX-A5-NOFPU: .eabi_attribute 24, 1 ; CORTEX-A5-NOFPU: .eabi_attribute 25, 1 -; CORTEX-A5-NOFPU: .eabi_attribute 42, 1 -; CORTEX-A5-NOFPU: .eabi_attribute 68, 1 ; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -645,6 +646,11 @@ ; CORTEX-A8-SOFT: .eabi_attribute 8, 1 ; CORTEX-A8-SOFT: .eabi_attribute 9, 2 ; CORTEX-A8-SOFT: .fpu neon +; CORTEX-A8-SOFT-NOT: .eabi_attribute 27 +; CORTEX-A8-SOFT-NOT: .eabi_attribute 36, 1 +; CORTEX-A8-SOFT-NOT: .eabi_attribute 42, 1 +; CORTEX-A8-SOFT-NOT: .eabi_attribute 44 +; CORTEX-A8-SOFT: .eabi_attribute 68, 1 ; CORTEX-A8-SOFT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A8-SOFT: .eabi_attribute 20, 1 @@ -653,13 +659,8 @@ ; CORTEX-A8-SOFT: .eabi_attribute 23, 3 ; CORTEX-A8-SOFT: .eabi_attribute 24, 1 ; CORTEX-A8-SOFT: .eabi_attribute 25, 1 -; CORTEX-A8-SOFT-NOT: .eabi_attribute 27 ; CORTEX-A8-SOFT-NOT: .eabi_attribute 28 -; CORTEX-A8-SOFT-NOT: .eabi_attribute 36, 1 ; CORTEX-A8-SOFT: .eabi_attribute 38, 1 -; CORTEX-A8-SOFT-NOT: .eabi_attribute 42, 1 -; CORTEX-A8-SOFT-NOT: .eabi_attribute 44 -; CORTEX-A8-SOFT: .eabi_attribute 68, 1 ; CORTEX-A9-SOFT: .cpu cortex-a9 ; CORTEX-A9-SOFT: .eabi_attribute 6, 10 @@ -667,6 +668,11 @@ ; CORTEX-A9-SOFT: .eabi_attribute 8, 1 ; CORTEX-A9-SOFT: .eabi_attribute 9, 2 ; CORTEX-A9-SOFT: .fpu neon +; CORTEX-A9-SOFT-NOT: .eabi_attribute 27 +; CORTEX-A9-SOFT: .eabi_attribute 36, 1 +; CORTEX-A9-SOFT: .eabi_attribute 42, 1 +; CORTEX-A9-SOFT-NOT: .eabi_attribute 44 +; CORTEX-A9-SOFT: .eabi_attribute 68, 1 ; CORTEX-A9-SOFT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A9-SOFT: .eabi_attribute 20, 1 @@ -675,13 +681,8 @@ ; CORTEX-A9-SOFT: .eabi_attribute 23, 3 ; CORTEX-A9-SOFT: .eabi_attribute 24, 1 ; CORTEX-A9-SOFT: .eabi_attribute 25, 1 -; CORTEX-A9-SOFT-NOT: .eabi_attribute 27 ; CORTEX-A9-SOFT-NOT: .eabi_attribute 28 -; CORTEX-A9-SOFT: .eabi_attribute 36, 1 ; CORTEX-A9-SOFT: .eabi_attribute 38, 1 -; CORTEX-A9-SOFT: .eabi_attribute 42, 1 -; CORTEX-A9-SOFT-NOT: .eabi_attribute 44 -; CORTEX-A9-SOFT: .eabi_attribute 68, 1 ; CORTEX-A8-SOFT-FAST-NOT: .eabi_attribute 19 ; CORTEX-A9-SOFT-FAST-NOT: .eabi_attribute 19 @@ -699,6 +700,10 @@ ; CORTEX-A8-HARD: .eabi_attribute 8, 1 ; CORTEX-A8-HARD: .eabi_attribute 9, 2 ; CORTEX-A8-HARD: .fpu neon +; CORTEX-A8-HARD-NOT: .eabi_attribute 27 +; CORTEX-A8-HARD-NOT: .eabi_attribute 36, 1 +; CORTEX-A8-HARD-NOT: .eabi_attribute 42, 1 +; CORTEX-A8-HARD: .eabi_attribute 68, 1 ; CORTEX-A8-HARD-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A8-HARD: .eabi_attribute 20, 1 @@ -707,12 +712,8 @@ ; CORTEX-A8-HARD: .eabi_attribute 23, 3 ; CORTEX-A8-HARD: .eabi_attribute 24, 1 ; CORTEX-A8-HARD: .eabi_attribute 25, 1 -; CORTEX-A8-HARD-NOT: .eabi_attribute 27 ; CORTEX-A8-HARD: .eabi_attribute 28, 1 -; CORTEX-A8-HARD-NOT: .eabi_attribute 36, 1 ; CORTEX-A8-HARD: .eabi_attribute 38, 1 -; CORTEX-A8-HARD-NOT: .eabi_attribute 42, 1 -; CORTEX-A8-HARD: .eabi_attribute 68, 1 @@ -722,6 +723,10 @@ ; CORTEX-A9-HARD: .eabi_attribute 8, 1 ; CORTEX-A9-HARD: .eabi_attribute 9, 2 ; CORTEX-A9-HARD: .fpu neon +; CORTEX-A9-HARD-NOT: .eabi_attribute 27 +; CORTEX-A9-HARD: .eabi_attribute 36, 1 +; CORTEX-A9-HARD: .eabi_attribute 42, 1 +; CORTEX-A9-HARD: .eabi_attribute 68, 1 ; CORTEX-A9-HARD-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A9-HARD: .eabi_attribute 20, 1 @@ -730,12 +735,8 @@ ; CORTEX-A9-HARD: .eabi_attribute 23, 3 ; CORTEX-A9-HARD: .eabi_attribute 24, 1 ; CORTEX-A9-HARD: .eabi_attribute 25, 1 -; CORTEX-A9-HARD-NOT: .eabi_attribute 27 ; CORTEX-A9-HARD: .eabi_attribute 28, 1 -; CORTEX-A9-HARD: .eabi_attribute 36, 1 ; CORTEX-A9-HARD: .eabi_attribute 38, 1 -; CORTEX-A9-HARD: .eabi_attribute 42, 1 -; CORTEX-A9-HARD: .eabi_attribute 68, 1 ; CORTEX-A8-HARD-FAST-NOT: .eabi_attribute 19 ;; The A8 defaults to a VFPv3 FPU, so it flushes preserving the sign when @@ -759,6 +760,9 @@ ; CORTEX-A12-DEFAULT: .eabi_attribute 8, 1 ; CORTEX-A12-DEFAULT: .eabi_attribute 9, 2 ; CORTEX-A12-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A12-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 44, 2 +; CORTEX-A12-DEFAULT: .eabi_attribute 68, 3 ; CORTEX-A12-DEFAULT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A12-DEFAULT: .eabi_attribute 20, 1 @@ -767,9 +771,6 @@ ; CORTEX-A12-DEFAULT: .eabi_attribute 23, 3 ; CORTEX-A12-DEFAULT: .eabi_attribute 24, 1 ; CORTEX-A12-DEFAULT: .eabi_attribute 25, 1 -; CORTEX-A12-DEFAULT: .eabi_attribute 42, 1 -; CORTEX-A12-DEFAULT: .eabi_attribute 44, 2 -; CORTEX-A12-DEFAULT: .eabi_attribute 68, 3 ; CORTEX-A12-DEFAULT-FAST-NOT: .eabi_attribute 19 ;; The A12 defaults to a VFPv3 FPU, so it flushes preserving the sign when @@ -785,6 +786,9 @@ ; CORTEX-A12-NOFPU: .eabi_attribute 8, 1 ; CORTEX-A12-NOFPU: .eabi_attribute 9, 2 ; CORTEX-A12-NOFPU-NOT: .fpu +; CORTEX-A12-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A12-NOFPU: .eabi_attribute 68, 3 ; CORTEX-A12-NOFPU-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A12-NOFPU: .eabi_attribute 20, 1 @@ -793,9 +797,6 @@ ; CORTEX-A12-NOFPU: .eabi_attribute 23, 3 ; CORTEX-A12-NOFPU: .eabi_attribute 24, 1 ; CORTEX-A12-NOFPU: .eabi_attribute 25, 1 -; CORTEX-A12-NOFPU: .eabi_attribute 42, 1 -; CORTEX-A12-NOFPU: .eabi_attribute 44, 2 -; CORTEX-A12-NOFPU: .eabi_attribute 68, 3 ; CORTEX-A12-NOFPU-FAST-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -812,6 +813,11 @@ ; CORTEX-A15: .eabi_attribute 8, 1 ; CORTEX-A15: .eabi_attribute 9, 2 ; CORTEX-A15: .fpu neon-vfpv4 +; CORTEX-A15-NOT: .eabi_attribute 27 +; CORTEX-A15: .eabi_attribute 36, 1 +; CORTEX-A15: .eabi_attribute 42, 1 +; CORTEX-A15: .eabi_attribute 44, 2 +; CORTEX-A15: .eabi_attribute 68, 3 ; CORTEX-A15-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A15: .eabi_attribute 20, 1 @@ -820,13 +826,8 @@ ; CORTEX-A15: .eabi_attribute 23, 3 ; CORTEX-A15: .eabi_attribute 24, 1 ; CORTEX-A15: .eabi_attribute 25, 1 -; CORTEX-A15-NOT: .eabi_attribute 27 ; CORTEX-A15-NOT: .eabi_attribute 28 -; CORTEX-A15: .eabi_attribute 36, 1 ; CORTEX-A15: .eabi_attribute 38, 1 -; CORTEX-A15: .eabi_attribute 42, 1 -; CORTEX-A15: .eabi_attribute 44, 2 -; CORTEX-A15: .eabi_attribute 68, 3 ; CORTEX-A15-FAST-NOT: .eabi_attribute 19 ;; The A15 defaults to a VFPv3 FPU, so it flushes preserving the sign when @@ -842,6 +843,9 @@ ; CORTEX-A17-DEFAULT: .eabi_attribute 8, 1 ; CORTEX-A17-DEFAULT: .eabi_attribute 9, 2 ; CORTEX-A17-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A17-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A17-DEFAULT: .eabi_attribute 44, 2 +; CORTEX-A17-DEFAULT: .eabi_attribute 68, 3 ; CORTEX-A17-DEFAULT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A17-DEFAULT: .eabi_attribute 20, 1 @@ -850,9 +854,6 @@ ; CORTEX-A17-DEFAULT: .eabi_attribute 23, 3 ; CORTEX-A17-DEFAULT: .eabi_attribute 24, 1 ; CORTEX-A17-DEFAULT: .eabi_attribute 25, 1 -; CORTEX-A17-DEFAULT: .eabi_attribute 42, 1 -; CORTEX-A17-DEFAULT: .eabi_attribute 44, 2 -; CORTEX-A17-DEFAULT: .eabi_attribute 68, 3 ; CORTEX-A17-FAST-NOT: .eabi_attribute 19 ;; The A17 defaults to a VFPv3 FPU, so it flushes preserving the sign when @@ -868,6 +869,9 @@ ; CORTEX-A17-NOFPU: .eabi_attribute 8, 1 ; CORTEX-A17-NOFPU: .eabi_attribute 9, 2 ; CORTEX-A17-NOFPU-NOT: .fpu +; CORTEX-A17-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A17-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A17-NOFPU: .eabi_attribute 68, 3 ; CORTEX-A17-NOFPU-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A17-NOFPU: .eabi_attribute 20, 1 @@ -876,9 +880,6 @@ ; CORTEX-A17-NOFPU: .eabi_attribute 23, 3 ; CORTEX-A17-NOFPU: .eabi_attribute 24, 1 ; CORTEX-A17-NOFPU: .eabi_attribute 25, 1 -; CORTEX-A17-NOFPU: .eabi_attribute 42, 1 -; CORTEX-A17-NOFPU: .eabi_attribute 44, 2 -; CORTEX-A17-NOFPU: .eabi_attribute 68, 3 ; CORTEX-A17-NOFPU-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -897,25 +898,25 @@ ; CORTEX-M0: .cpu cortex-m0 ; CORTEX-M0: .eabi_attribute 6, 12 -; CORTEX-M0-NOT: .eabi_attribute 7 +; CORTEX-M0: .eabi_attribute 7, 77 ; CORTEX-M0: .eabi_attribute 8, 0 ; CORTEX-M0: .eabi_attribute 9, 1 +; CORTEX-M0-NOT: .eabi_attribute 27 +; CORTEX-M0-NOT: .eabi_attribute 36 +; CORTEX-M0: .eabi_attribute 34, 0 +; CORTEX-M0-NOT: .eabi_attribute 42 +; CORTEX-M0-NOT: .eabi_attribute 44 +; CORTEX-M0-NOT: .eabi_attribute 68 ; CORTEX-M0-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M0: .eabi_attribute 20, 1 ; CORTEX-M0: .eabi_attribute 21, 1 ; CORTEX-M0-NOT: .eabi_attribute 22 ; CORTEX-M0: .eabi_attribute 23, 3 -; CORTEX-M0: .eabi_attribute 34, 0 ; CORTEX-M0: .eabi_attribute 24, 1 ; CORTEX-M0: .eabi_attribute 25, 1 -; CORTEX-M0-NOT: .eabi_attribute 27 ; CORTEX-M0-NOT: .eabi_attribute 28 -; CORTEX-M0-NOT: .eabi_attribute 36 ; CORTEX-M0: .eabi_attribute 38, 1 -; CORTEX-M0-NOT: .eabi_attribute 42 -; CORTEX-M0-NOT: .eabi_attribute 44 -; CORTEX-M0-NOT: .eabi_attribute 68 ; CORTEX-M0-FAST-NOT: .eabi_attribute 19 ;; Despite the M0 CPU having no FPU in this scenario, we chose to @@ -930,9 +931,14 @@ ; CORTEX-M0PLUS: .cpu cortex-m0plus ; CORTEX-M0PLUS: .eabi_attribute 6, 12 -; CORTEX-M0PLUS-NOT: .eabi_attribute 7 +; CORTEX-M0PLUS: .eabi_attribute 7, 77 ; CORTEX-M0PLUS: .eabi_attribute 8, 0 ; CORTEX-M0PLUS: .eabi_attribute 9, 1 +; CORTEX-M0PLUS-NOT: .eabi_attribute 27 +; CORTEX-M0PLUS-NOT: .eabi_attribute 36 +; CORTEX-M0PLUS-NOT: .eabi_attribute 42 +; CORTEX-M0PLUS-NOT: .eabi_attribute 44 +; CORTEX-M0PLUS-NOT: .eabi_attribute 68 ; CORTEX-M0PLUS-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M0PLUS: .eabi_attribute 20, 1 @@ -941,13 +947,8 @@ ; CORTEX-M0PLUS: .eabi_attribute 23, 3 ; CORTEX-M0PLUS: .eabi_attribute 24, 1 ; CORTEX-M0PLUS: .eabi_attribute 25, 1 -; CORTEX-M0PLUS-NOT: .eabi_attribute 27 ; CORTEX-M0PLUS-NOT: .eabi_attribute 28 -; CORTEX-M0PLUS-NOT: .eabi_attribute 36 ; CORTEX-M0PLUS: .eabi_attribute 38, 1 -; CORTEX-M0PLUS-NOT: .eabi_attribute 42 -; CORTEX-M0PLUS-NOT: .eabi_attribute 44 -; CORTEX-M0PLUS-NOT: .eabi_attribute 68 ; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 19 ;; Despite the M0+ CPU having no FPU in this scenario, we chose to @@ -962,9 +963,14 @@ ; CORTEX-M1: .cpu cortex-m1 ; CORTEX-M1: .eabi_attribute 6, 12 -; CORTEX-M1-NOT: .eabi_attribute 7 +; CORTEX-M1: .eabi_attribute 7, 77 ; CORTEX-M1: .eabi_attribute 8, 0 ; CORTEX-M1: .eabi_attribute 9, 1 +; CORTEX-M1-NOT: .eabi_attribute 27 +; CORTEX-M1-NOT: .eabi_attribute 36 +; CORTEX-M1-NOT: .eabi_attribute 42 +; CORTEX-M1-NOT: .eabi_attribute 44 +; CORTEX-M1-NOT: .eabi_attribute 68 ; CORTEX-M1-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M1: .eabi_attribute 20, 1 @@ -973,13 +979,8 @@ ; CORTEX-M1: .eabi_attribute 23, 3 ; CORTEX-M1: .eabi_attribute 24, 1 ; CORTEX-M1: .eabi_attribute 25, 1 -; CORTEX-M1-NOT: .eabi_attribute 27 ; CORTEX-M1-NOT: .eabi_attribute 28 -; CORTEX-M1-NOT: .eabi_attribute 36 ; CORTEX-M1: .eabi_attribute 38, 1 -; CORTEX-M1-NOT: .eabi_attribute 42 -; CORTEX-M1-NOT: .eabi_attribute 44 -; CORTEX-M1-NOT: .eabi_attribute 68 ; CORTEX-M1-FAST-NOT: .eabi_attribute 19 ;; Despite the M1 CPU having no FPU in this scenario, we chose to @@ -994,9 +995,13 @@ ; SC000: .cpu sc000 ; SC000: .eabi_attribute 6, 12 -; SC000-NOT: .eabi_attribute 7 +; SC000: .eabi_attribute 7, 77 ; SC000: .eabi_attribute 8, 0 ; SC000: .eabi_attribute 9, 1 +; SC000-NOT: .eabi_attribute 27 +; SC000-NOT: .eabi_attribute 42 +; SC000-NOT: .eabi_attribute 44 +; SC000-NOT: .eabi_attribute 68 ; SC000-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; SC000: .eabi_attribute 20, 1 @@ -1005,13 +1010,8 @@ ; SC000: .eabi_attribute 23, 3 ; SC000: .eabi_attribute 24, 1 ; SC000: .eabi_attribute 25, 1 -; SC000-NOT: .eabi_attribute 27 ; SC000-NOT: .eabi_attribute 28 -; SC000-NOT: .eabi_attribute 36 ; SC000: .eabi_attribute 38, 1 -; SC000-NOT: .eabi_attribute 42 -; SC000-NOT: .eabi_attribute 44 -; SC000-NOT: .eabi_attribute 68 ; SC000-FAST-NOT: .eabi_attribute 19 ;; Despite the SC000 CPU having no FPU in this scenario, we chose to @@ -1029,6 +1029,11 @@ ; CORTEX-M3: .eabi_attribute 7, 77 ; CORTEX-M3: .eabi_attribute 8, 0 ; CORTEX-M3: .eabi_attribute 9, 2 +; CORTEX-M3-NOT: .eabi_attribute 27 +; CORTEX-M3-NOT: .eabi_attribute 36 +; CORTEX-M3-NOT: .eabi_attribute 42 +; CORTEX-M3-NOT: .eabi_attribute 44 +; CORTEX-M3-NOT: .eabi_attribute 68 ; CORTEX-M3-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M3: .eabi_attribute 20, 1 @@ -1037,13 +1042,8 @@ ; CORTEX-M3: .eabi_attribute 23, 3 ; CORTEX-M3: .eabi_attribute 24, 1 ; CORTEX-M3: .eabi_attribute 25, 1 -; CORTEX-M3-NOT: .eabi_attribute 27 ; CORTEX-M3-NOT: .eabi_attribute 28 -; CORTEX-M3-NOT: .eabi_attribute 36 ; CORTEX-M3: .eabi_attribute 38, 1 -; CORTEX-M3-NOT: .eabi_attribute 42 -; CORTEX-M3-NOT: .eabi_attribute 44 -; CORTEX-M3-NOT: .eabi_attribute 68 ; CORTEX-M3-FAST-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -1059,6 +1059,11 @@ ; SC300: .eabi_attribute 7, 77 ; SC300: .eabi_attribute 8, 0 ; SC300: .eabi_attribute 9, 2 +; SC300-NOT: .eabi_attribute 27 +; SC300-NOT: .eabi_attribute 36 +; SC300-NOT: .eabi_attribute 42 +; SC300-NOT: .eabi_attribute 44 +; SC300-NOT: .eabi_attribute 68 ; SC300-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; SC300: .eabi_attribute 20, 1 @@ -1067,13 +1072,8 @@ ; SC300: .eabi_attribute 23, 3 ; SC300: .eabi_attribute 24, 1 ; SC300: .eabi_attribute 25, 1 -; SC300-NOT: .eabi_attribute 27 ; SC300-NOT: .eabi_attribute 28 -; SC300-NOT: .eabi_attribute 36 ; SC300: .eabi_attribute 38, 1 -; SC300-NOT: .eabi_attribute 42 -; SC300-NOT: .eabi_attribute 44 -; SC300-NOT: .eabi_attribute 68 ; SC300-FAST-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -1090,6 +1090,11 @@ ; CORTEX-M4-SOFT: .eabi_attribute 8, 0 ; CORTEX-M4-SOFT: .eabi_attribute 9, 2 ; CORTEX-M4-SOFT: .fpu fpv4-sp-d16 +; CORTEX-M4-SOFT: .eabi_attribute 27, 1 +; CORTEX-M4-SOFT: .eabi_attribute 36, 1 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 42 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 44 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 68 ; CORTEX-M4-SOFT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M4-SOFT: .eabi_attribute 20, 1 @@ -1098,13 +1103,8 @@ ; CORTEX-M4-SOFT: .eabi_attribute 23, 3 ; CORTEX-M4-SOFT: .eabi_attribute 24, 1 ; CORTEX-M4-SOFT: .eabi_attribute 25, 1 -; CORTEX-M4-SOFT: .eabi_attribute 27, 1 ; CORTEX-M4-SOFT-NOT: .eabi_attribute 28 -; CORTEX-M4-SOFT: .eabi_attribute 36, 1 ; CORTEX-M4-SOFT: .eabi_attribute 38, 1 -; CORTEX-M4-SOFT-NOT: .eabi_attribute 42 -; CORTEX-M4-SOFT-NOT: .eabi_attribute 44 -; CORTEX-M4-SOFT-NOT: .eabi_attribute 68 ; CORTEX-M4-SOFT-FAST-NOT: .eabi_attribute 19 ;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when @@ -1120,6 +1120,11 @@ ; CORTEX-M4-HARD: .eabi_attribute 8, 0 ; CORTEX-M4-HARD: .eabi_attribute 9, 2 ; CORTEX-M4-HARD: .fpu fpv4-sp-d16 +; CORTEX-M4-HARD: .eabi_attribute 27, 1 +; CORTEX-M4-HARD: .eabi_attribute 36, 1 +; CORTEX-M4-HARD-NOT: .eabi_attribute 42 +; CORTEX-M4-HARD-NOT: .eabi_attribute 44 +; CORTEX-M4-HARD-NOT: .eabi_attribute 68 ; CORTEX-M4-HARD-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M4-HARD: .eabi_attribute 20, 1 @@ -1128,13 +1133,8 @@ ; CORTEX-M4-HARD: .eabi_attribute 23, 3 ; CORTEX-M4-HARD: .eabi_attribute 24, 1 ; CORTEX-M4-HARD: .eabi_attribute 25, 1 -; CORTEX-M4-HARD: .eabi_attribute 27, 1 ; CORTEX-M4-HARD: .eabi_attribute 28, 1 -; CORTEX-M4-HARD: .eabi_attribute 36, 1 ; CORTEX-M4-HARD: .eabi_attribute 38, 1 -; CORTEX-M4-HARD-NOT: .eabi_attribute 42 -; CORTEX-M4-HARD-NOT: .eabi_attribute 44 -; CORTEX-M4-HARD-NOT: .eabi_attribute 68 ; CORTEX-M4-HARD-FAST-NOT: .eabi_attribute 19 ;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when @@ -1152,6 +1152,11 @@ ; CORTEX-M7-SOFT-NOT: .fpu ; CORTEX-M7-SINGLE: .fpu fpv5-sp-d16 ; CORTEX-M7-DOUBLE: .fpu fpv5-d16 +; CORTEX-M7-SOFT-NOT: .eabi_attribute 27 +; CORTEX-M7-SINGLE: .eabi_attribute 27, 1 +; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27 +; CORTEX-M7: .eabi_attribute 36, 1 +; CORTEX-M7-NOT: .eabi_attribute 44 ; CORTEX-M7: .eabi_attribute 17, 1 ; CORTEX-M7-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance @@ -1161,12 +1166,7 @@ ; CORTEX-M7: .eabi_attribute 23, 3 ; CORTEX-M7: .eabi_attribute 24, 1 ; CORTEX-M7: .eabi_attribute 25, 1 -; CORTEX-M7-SOFT-NOT: .eabi_attribute 27 -; CORTEX-M7-SINGLE: .eabi_attribute 27, 1 -; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27 -; CORTEX-M7: .eabi_attribute 36, 1 ; CORTEX-M7: .eabi_attribute 38, 1 -; CORTEX-M7-NOT: .eabi_attribute 44 ; CORTEX-M7: .eabi_attribute 14, 0 ; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 19 @@ -1186,6 +1186,10 @@ ; CORTEX-R4: .eabi_attribute 8, 1 ; CORTEX-R4: .eabi_attribute 9, 2 ; CORTEX-R4-NOT: .fpu vfpv3-d16 +; CORTEX-R4-NOT: .eabi_attribute 36 +; CORTEX-R4-NOT: .eabi_attribute 42 +; CORTEX-R4-NOT: .eabi_attribute 44 +; CORTEX-R4-NOT: .eabi_attribute 68 ; CORTEX-R4-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R4: .eabi_attribute 20, 1 @@ -1195,11 +1199,7 @@ ; CORTEX-R4: .eabi_attribute 24, 1 ; CORTEX-R4: .eabi_attribute 25, 1 ; CORTEX-R4-NOT: .eabi_attribute 28 -; CORTEX-R4-NOT: .eabi_attribute 36 ; CORTEX-R4: .eabi_attribute 38, 1 -; CORTEX-R4-NOT: .eabi_attribute 42 -; CORTEX-R4-NOT: .eabi_attribute 44 -; CORTEX-R4-NOT: .eabi_attribute 68 ; CORTEX-R4F: .cpu cortex-r4f ; CORTEX-R4F: .eabi_attribute 6, 10 @@ -1207,6 +1207,11 @@ ; CORTEX-R4F: .eabi_attribute 8, 1 ; CORTEX-R4F: .eabi_attribute 9, 2 ; CORTEX-R4F: .fpu vfpv3-d16 +; CORTEX-R4F-NOT: .eabi_attribute 27, 1 +; CORTEX-R4F-NOT: .eabi_attribute 36 +; CORTEX-R4F-NOT: .eabi_attribute 42 +; CORTEX-R4F-NOT: .eabi_attribute 44 +; CORTEX-R4F-NOT: .eabi_attribute 68 ; CORTEX-R4F-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R4F: .eabi_attribute 20, 1 @@ -1215,13 +1220,8 @@ ; CORTEX-R4F: .eabi_attribute 23, 3 ; CORTEX-R4F: .eabi_attribute 24, 1 ; CORTEX-R4F: .eabi_attribute 25, 1 -; CORTEX-R4F-NOT: .eabi_attribute 27, 1 ; CORTEX-R4F-NOT: .eabi_attribute 28 -; CORTEX-R4F-NOT: .eabi_attribute 36 ; CORTEX-R4F: .eabi_attribute 38, 1 -; CORTEX-R4F-NOT: .eabi_attribute 42 -; CORTEX-R4F-NOT: .eabi_attribute 44 -; CORTEX-R4F-NOT: .eabi_attribute 68 ; CORTEX-R5: .cpu cortex-r5 ; CORTEX-R5: .eabi_attribute 6, 10 @@ -1229,6 +1229,11 @@ ; CORTEX-R5: .eabi_attribute 8, 1 ; CORTEX-R5: .eabi_attribute 9, 2 ; CORTEX-R5: .fpu vfpv3-d16 +; CORTEX-R5-NOT: .eabi_attribute 27, 1 +; CORTEX-R5-NOT: .eabi_attribute 36 +; CORTEX-R5: .eabi_attribute 44, 2 +; CORTEX-R5-NOT: .eabi_attribute 42 +; CORTEX-R5-NOT: .eabi_attribute 68 ; CORTEX-R5-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R5: .eabi_attribute 20, 1 @@ -1237,13 +1242,8 @@ ; CORTEX-R5: .eabi_attribute 23, 3 ; CORTEX-R5: .eabi_attribute 24, 1 ; CORTEX-R5: .eabi_attribute 25, 1 -; CORTEX-R5-NOT: .eabi_attribute 27, 1 ; CORTEX-R5-NOT: .eabi_attribute 28 -; CORTEX-R5-NOT: .eabi_attribute 36 ; CORTEX-R5: .eabi_attribute 38, 1 -; CORTEX-R5-NOT: .eabi_attribute 42 -; CORTEX-R5: .eabi_attribute 44, 2 -; CORTEX-R5-NOT: .eabi_attribute 68 ; CORTEX-R5-FAST-NOT: .eabi_attribute 19 ;; The R5 has the VFPv3 FP unit, which always flushes preserving sign. @@ -1258,6 +1258,10 @@ ; CORTEX-R7: .eabi_attribute 8, 1 ; CORTEX-R7: .eabi_attribute 9, 2 ; CORTEX-R7: .fpu vfpv3-d16-fp16 +; CORTEX-R7: .eabi_attribute 36, 1 +; CORTEX-R7: .eabi_attribute 42, 1 +; CORTEX-R7: .eabi_attribute 44, 2 +; CORTEX-R7-NOT: .eabi_attribute 68 ; CORTEX-R7-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R7: .eabi_attribute 20, 1 @@ -1267,11 +1271,7 @@ ; CORTEX-R7: .eabi_attribute 24, 1 ; CORTEX-R7: .eabi_attribute 25, 1 ; CORTEX-R7-NOT: .eabi_attribute 28 -; CORTEX-R7: .eabi_attribute 36, 1 ; CORTEX-R7: .eabi_attribute 38, 1 -; CORTEX-R7: .eabi_attribute 42, 1 -; CORTEX-R7: .eabi_attribute 44, 2 -; CORTEX-R7-NOT: .eabi_attribute 68 ; CORTEX-R7-FAST-NOT: .eabi_attribute 19 ;; The R7 has the VFPv3 FP unit, which always flushes preserving sign. @@ -1286,6 +1286,10 @@ ; CORTEX-R8: .eabi_attribute 8, 1 ; CORTEX-R8: .eabi_attribute 9, 2 ; CORTEX-R8: .fpu vfpv3-d16-fp16 +; CORTEX-R8: .eabi_attribute 36, 1 +; CORTEX-R8: .eabi_attribute 42, 1 +; CORTEX-R8: .eabi_attribute 44, 2 +; CORTEX-R8-NOT: .eabi_attribute 68 ; CORTEX-R8-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R8: .eabi_attribute 20, 1 @@ -1295,11 +1299,7 @@ ; CORTEX-R8: .eabi_attribute 24, 1 ; CORTEX-R8: .eabi_attribute 25, 1 ; CORTEX-R8-NOT: .eabi_attribute 28 -; CORTEX-R8: .eabi_attribute 36, 1 ; CORTEX-R8: .eabi_attribute 38, 1 -; CORTEX-R8: .eabi_attribute 42, 1 -; CORTEX-R8: .eabi_attribute 44, 2 -; CORTEX-R8-NOT: .eabi_attribute 68 ; CORTEX-R8-FAST-NOT: .eabi_attribute 19 ;; The R8 has the VFPv3 FP unit, which always flushes preserving sign. @@ -1315,6 +1315,11 @@ ; CORTEX-A32: .eabi_attribute 9, 2 ; CORTEX-A32: .fpu crypto-neon-fp-armv8 ; CORTEX-A32: .eabi_attribute 12, 3 +; CORTEX-A32-NOT: .eabi_attribute 27 +; CORTEX-A32: .eabi_attribute 36, 1 +; CORTEX-A32: .eabi_attribute 42, 1 +; CORTEX-A32-NOT: .eabi_attribute 44 +; CORTEX-A32: .eabi_attribute 68, 3 ; CORTEX-A32-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A32: .eabi_attribute 20, 1 @@ -1323,13 +1328,8 @@ ; CORTEX-A32: .eabi_attribute 23, 3 ; CORTEX-A32: .eabi_attribute 24, 1 ; CORTEX-A32: .eabi_attribute 25, 1 -; CORTEX-A32-NOT: .eabi_attribute 27 ; CORTEX-A32-NOT: .eabi_attribute 28 -; CORTEX-A32: .eabi_attribute 36, 1 ; CORTEX-A32: .eabi_attribute 38, 1 -; CORTEX-A32: .eabi_attribute 42, 1 -; CORTEX-A32-NOT: .eabi_attribute 44 -; CORTEX-A32: .eabi_attribute 68, 3 ; CORTEX-A32-FAST-NOT: .eabi_attribute 19 ;; The A32 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1343,20 +1343,20 @@ ; CORTEX-M23: .eabi_attribute 7, 77 ; CORTEX-M23: .eabi_attribute 8, 0 ; CORTEX-M23: .eabi_attribute 9, 3 +; CORTEX-M23-NOT: .eabi_attribute 27 +; CORTEX-M23: .eabi_attribute 34, 1 +; CORTEX-M23-NOT: .eabi_attribute 44 ; CORTEX-M23: .eabi_attribute 17, 1 ;; We default to IEEE 754 compliance ; CORTEX-M23-NOT: .eabi_attribute 19 ; CORTEX-M23: .eabi_attribute 20, 1 ; CORTEX-M23: .eabi_attribute 21, 1 ; CORTEX-M23: .eabi_attribute 23, 3 -; CORTEX-M23: .eabi_attribute 34, 1 ; CORTEX-M23: .eabi_attribute 24, 1 -; CORTEX-M23-NOT: .eabi_attribute 27 ; CORTEX-M23-NOT: .eabi_attribute 28 ; CORTEX-M23: .eabi_attribute 25, 1 ; CORTEX-M23: .eabi_attribute 38, 1 ; CORTEX-M23: .eabi_attribute 14, 0 -; CORTEX-M23-NOT: .eabi_attribute 44 ; CORTEX-M33: .cpu cortex-m33 ; CORTEX-M33: .eabi_attribute 6, 17 @@ -1364,21 +1364,21 @@ ; CORTEX-M33: .eabi_attribute 8, 0 ; CORTEX-M33: .eabi_attribute 9, 3 ; CORTEX-M33: .fpu fpv5-sp-d16 +; CORTEX-M33: .eabi_attribute 27, 1 +; CORTEX-M33: .eabi_attribute 36, 1 +; CORTEX-M33-NOT: .eabi_attribute 44 +; CORTEX-M33: .eabi_attribute 46, 1 +; CORTEX-M33: .eabi_attribute 34, 1 ; CORTEX-M33: .eabi_attribute 17, 1 ;; We default to IEEE 754 compliance ; CORTEX-M23-NOT: .eabi_attribute 19 ; CORTEX-M33: .eabi_attribute 20, 1 ; CORTEX-M33: .eabi_attribute 21, 1 ; CORTEX-M33: .eabi_attribute 23, 3 -; CORTEX-M33: .eabi_attribute 34, 1 ; CORTEX-M33: .eabi_attribute 24, 1 ; CORTEX-M33: .eabi_attribute 25, 1 -; CORTEX-M33: .eabi_attribute 27, 1 ; CORTEX-M33-NOT: .eabi_attribute 28 -; CORTEX-M33: .eabi_attribute 36, 1 ; CORTEX-M33: .eabi_attribute 38, 1 -; CORTEX-M33: .eabi_attribute 46, 1 -; CORTEX-M33-NOT: .eabi_attribute 44 ; CORTEX-M33: .eabi_attribute 14, 0 ; CORTEX-M33-FAST-NOT: .eabi_attribute 19 @@ -1394,6 +1394,11 @@ ; CORTEX-A35: .eabi_attribute 9, 2 ; CORTEX-A35: .fpu crypto-neon-fp-armv8 ; CORTEX-A35: .eabi_attribute 12, 3 +; CORTEX-A35-NOT: .eabi_attribute 27 +; CORTEX-A35: .eabi_attribute 36, 1 +; CORTEX-A35: .eabi_attribute 42, 1 +; CORTEX-A35-NOT: .eabi_attribute 44 +; CORTEX-A35: .eabi_attribute 68, 3 ; CORTEX-A35-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A35: .eabi_attribute 20, 1 @@ -1402,13 +1407,8 @@ ; CORTEX-A35: .eabi_attribute 23, 3 ; CORTEX-A35: .eabi_attribute 24, 1 ; CORTEX-A35: .eabi_attribute 25, 1 -; CORTEX-A35-NOT: .eabi_attribute 27 ; CORTEX-A35-NOT: .eabi_attribute 28 -; CORTEX-A35: .eabi_attribute 36, 1 ; CORTEX-A35: .eabi_attribute 38, 1 -; CORTEX-A35: .eabi_attribute 42, 1 -; CORTEX-A35-NOT: .eabi_attribute 44 -; CORTEX-A35: .eabi_attribute 68, 3 ; CORTEX-A35-FAST-NOT: .eabi_attribute 19 ;; The A35 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1424,6 +1424,11 @@ ; CORTEX-A53: .eabi_attribute 9, 2 ; CORTEX-A53: .fpu crypto-neon-fp-armv8 ; CORTEX-A53: .eabi_attribute 12, 3 +; CORTEX-A53-NOT: .eabi_attribute 27 +; CORTEX-A53: .eabi_attribute 36, 1 +; CORTEX-A53: .eabi_attribute 42, 1 +; CORTEX-A53-NOT: .eabi_attribute 44 +; CORTEX-A53: .eabi_attribute 68, 3 ; CORTEX-A53-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A53: .eabi_attribute 20, 1 @@ -1432,13 +1437,8 @@ ; CORTEX-A53: .eabi_attribute 23, 3 ; CORTEX-A53: .eabi_attribute 24, 1 ; CORTEX-A53: .eabi_attribute 25, 1 -; CORTEX-A53-NOT: .eabi_attribute 27 ; CORTEX-A53-NOT: .eabi_attribute 28 -; CORTEX-A53: .eabi_attribute 36, 1 ; CORTEX-A53: .eabi_attribute 38, 1 -; CORTEX-A53: .eabi_attribute 42, 1 -; CORTEX-A53-NOT: .eabi_attribute 44 -; CORTEX-A53: .eabi_attribute 68, 3 ; CORTEX-A53-FAST-NOT: .eabi_attribute 19 ;; The A53 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1454,6 +1454,11 @@ ; CORTEX-A57: .eabi_attribute 9, 2 ; CORTEX-A57: .fpu crypto-neon-fp-armv8 ; CORTEX-A57: .eabi_attribute 12, 3 +; CORTEX-A57-NOT: .eabi_attribute 27 +; CORTEX-A57: .eabi_attribute 36, 1 +; CORTEX-A57: .eabi_attribute 42, 1 +; CORTEX-A57-NOT: .eabi_attribute 44 +; CORTEX-A57: .eabi_attribute 68, 3 ; CORTEX-A57-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A57: .eabi_attribute 20, 1 @@ -1462,13 +1467,8 @@ ; CORTEX-A57: .eabi_attribute 23, 3 ; CORTEX-A57: .eabi_attribute 24, 1 ; CORTEX-A57: .eabi_attribute 25, 1 -; CORTEX-A57-NOT: .eabi_attribute 27 ; CORTEX-A57-NOT: .eabi_attribute 28 -; CORTEX-A57: .eabi_attribute 36, 1 ; CORTEX-A57: .eabi_attribute 38, 1 -; CORTEX-A57: .eabi_attribute 42, 1 -; CORTEX-A57-NOT: .eabi_attribute 44 -; CORTEX-A57: .eabi_attribute 68, 3 ; CORTEX-A57-FAST-NOT: .eabi_attribute 19 ;; The A57 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1484,6 +1484,11 @@ ; CORTEX-A72: .eabi_attribute 9, 2 ; CORTEX-A72: .fpu crypto-neon-fp-armv8 ; CORTEX-A72: .eabi_attribute 12, 3 +; CORTEX-A72-NOT: .eabi_attribute 27 +; CORTEX-A72: .eabi_attribute 36, 1 +; CORTEX-A72: .eabi_attribute 42, 1 +; CORTEX-A72-NOT: .eabi_attribute 44 +; CORTEX-A72: .eabi_attribute 68, 3 ; CORTEX-A72-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A72: .eabi_attribute 20, 1 @@ -1492,13 +1497,8 @@ ; CORTEX-A72: .eabi_attribute 23, 3 ; CORTEX-A72: .eabi_attribute 24, 1 ; CORTEX-A72: .eabi_attribute 25, 1 -; CORTEX-A72-NOT: .eabi_attribute 27 ; CORTEX-A72-NOT: .eabi_attribute 28 -; CORTEX-A72: .eabi_attribute 36, 1 ; CORTEX-A72: .eabi_attribute 38, 1 -; CORTEX-A72: .eabi_attribute 42, 1 -; CORTEX-A72-NOT: .eabi_attribute 44 -; CORTEX-A72: .eabi_attribute 68, 3 ; CORTEX-A72-FAST-NOT: .eabi_attribute 19 ;; The A72 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1514,6 +1514,11 @@ ; CORTEX-A73: .eabi_attribute 9, 2 ; CORTEX-A73: .fpu crypto-neon-fp-armv8 ; CORTEX-A73: .eabi_attribute 12, 3 +; CORTEX-A73-NOT: .eabi_attribute 27 +; CORTEX-A73: .eabi_attribute 36, 1 +; CORTEX-A73: .eabi_attribute 42, 1 +; CORTEX-A73-NOT: .eabi_attribute 44 +; CORTEX-A73: .eabi_attribute 68, 3 ; CORTEX-A73-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A73: .eabi_attribute 20, 1 @@ -1522,14 +1527,9 @@ ; CORTEX-A73: .eabi_attribute 23, 3 ; CORTEX-A73: .eabi_attribute 24, 1 ; CORTEX-A73: .eabi_attribute 25, 1 -; CORTEX-A73-NOT: .eabi_attribute 27 ; CORTEX-A73-NOT: .eabi_attribute 28 -; CORTEX-A73: .eabi_attribute 36, 1 ; CORTEX-A73: .eabi_attribute 38, 1 -; CORTEX-A73: .eabi_attribute 42, 1 -; CORTEX-A73-NOT: .eabi_attribute 44 ; CORTEX-A73: .eabi_attribute 14, 0 -; CORTEX-A73: .eabi_attribute 68, 3 ; EXYNOS-M1: .cpu exynos-m1 ; EXYNOS-M1: .eabi_attribute 6, 14 @@ -1538,6 +1538,11 @@ ; EXYNOS-M1: .eabi_attribute 9, 2 ; EXYNOS-M1: .fpu crypto-neon-fp-armv8 ; EXYNOS-M1: .eabi_attribute 12, 3 +; EXYNOS-M1-NOT: .eabi_attribute 27 +; EXYNOS-M1: .eabi_attribute 36, 1 +; EXYNOS-M1: .eabi_attribute 42, 1 +; EXYNOS-M1-NOT: .eabi_attribute 44 +; EXYNOS-M1: .eabi_attribute 68, 3 ; EXYNOS-M1-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; EXYNOS-M1: .eabi_attribute 20, 1 @@ -1546,13 +1551,8 @@ ; EXYNOS-M1: .eabi_attribute 23, 3 ; EXYNOS-M1: .eabi_attribute 24, 1 ; EXYNOS-M1: .eabi_attribute 25, 1 -; EXYNOS-M1-NOT: .eabi_attribute 27 ; EXYNOS-M1-NOT: .eabi_attribute 28 -; EXYNOS-M1: .eabi_attribute 36, 1 ; EXYNOS-M1: .eabi_attribute 38, 1 -; EXYNOS-M1: .eabi_attribute 42, 1 -; EXYNOS-M1-NOT: .eabi_attribute 44 -; EXYNOS-M1: .eabi_attribute 68, 3 ; EXYNOS-M1-FAST-NOT: .eabi_attribute 19 ;; The exynos-m1 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1568,6 +1568,11 @@ ; EXYNOS-M2: .eabi_attribute 9, 2 ; EXYNOS-M2: .fpu crypto-neon-fp-armv8 ; EXYNOS-M2: .eabi_attribute 12, 3 +; EXYNOS-M2-NOT: .eabi_attribute 27 +; EXYNOS-M2: .eabi_attribute 36, 1 +; EXYNOS-M2: .eabi_attribute 42, 1 +; EXYNOS-M2-NOT: .eabi_attribute 44 +; EXYNOS-M2: .eabi_attribute 68, 3 ; EXYNOS-M2-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; EXYNOS-M2: .eabi_attribute 20, 1 @@ -1576,13 +1581,8 @@ ; EXYNOS-M2: .eabi_attribute 23, 3 ; EXYNOS-M2: .eabi_attribute 24, 1 ; EXYNOS-M2: .eabi_attribute 25, 1 -; EXYNOS-M2-NOT: .eabi_attribute 27 ; EXYNOS-M2-NOT: .eabi_attribute 28 -; EXYNOS-M2: .eabi_attribute 36, 1 ; EXYNOS-M2: .eabi_attribute 38, 1 -; EXYNOS-M2: .eabi_attribute 42, 1 -; EXYNOS-M2-NOT: .eabi_attribute 44 -; EXYNOS-M2: .eabi_attribute 68, 3 ; EXYNOS-M3: .cpu exynos-m3 ; EXYNOS-M3: .eabi_attribute 6, 14 @@ -1591,6 +1591,11 @@ ; EXYNOS-M3: .eabi_attribute 9, 2 ; EXYNOS-M3: .fpu crypto-neon-fp-armv8 ; EXYNOS-M3: .eabi_attribute 12, 3 +; EXYNOS-M3-NOT: .eabi_attribute 27 +; EXYNOS-M3: .eabi_attribute 36, 1 +; EXYNOS-M3: .eabi_attribute 42, 1 +; EXYNOS-M3-NOT: .eabi_attribute 44 +; EXYNOS-M3: .eabi_attribute 68, 3 ; EXYNOS-M3-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; EXYNOS-M3: .eabi_attribute 20, 1 @@ -1599,13 +1604,8 @@ ; EXYNOS-M3: .eabi_attribute 23, 3 ; EXYNOS-M3: .eabi_attribute 24, 1 ; EXYNOS-M3: .eabi_attribute 25, 1 -; EXYNOS-M3-NOT: .eabi_attribute 27 ; EXYNOS-M3-NOT: .eabi_attribute 28 -; EXYNOS-M3: .eabi_attribute 36, 1 ; EXYNOS-M3: .eabi_attribute 38, 1 -; EXYNOS-M3: .eabi_attribute 42, 1 -; EXYNOS-M3-NOT: .eabi_attribute 44 -; EXYNOS-M3: .eabi_attribute 68, 3 ; GENERIC-FPU-VFPV3-FP16: .fpu vfpv3-fp16 ; GENERIC-FPU-VFPV3-D16-FP16: .fpu vfpv3-d16-fp16 @@ -1619,6 +1619,11 @@ ; GENERIC-ARMV8_1-A: .eabi_attribute 9, 2 ; GENERIC-ARMV8_1-A: .fpu crypto-neon-fp-armv8 ; GENERIC-ARMV8_1-A: .eabi_attribute 12, 4 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 27 +; GENERIC-ARMV8_1-A: .eabi_attribute 36, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 42, 1 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 44 +; GENERIC-ARMV8_1-A: .eabi_attribute 68, 3 ; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; GENERIC-ARMV8_1-A: .eabi_attribute 20, 1 @@ -1627,13 +1632,8 @@ ; GENERIC-ARMV8_1-A: .eabi_attribute 23, 3 ; GENERIC-ARMV8_1-A: .eabi_attribute 24, 1 ; GENERIC-ARMV8_1-A: .eabi_attribute 25, 1 -; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 27 ; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 28 -; GENERIC-ARMV8_1-A: .eabi_attribute 36, 1 ; GENERIC-ARMV8_1-A: .eabi_attribute 38, 1 -; GENERIC-ARMV8_1-A: .eabi_attribute 42, 1 -; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 44 -; GENERIC-ARMV8_1-A: .eabi_attribute 68, 3 ; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 19 ;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign. @@ -1670,23 +1670,16 @@ ; ARMv8R-SP-NOT: .eabi_attribute 12 ; ARMv8R-NEON: .fpu neon-fp-armv8 ; ARMv8R-NEON: .eabi_attribute 12, 3 @ Tag_Advanced_SIMD_arch -; ARMv8R: .eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use -; ARMv8R: .eabi_attribute 20, 1 @ Tag_ABI_FP_denormal -; ARMv8R: .eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions -; ARMv8R: .eabi_attribute 23, 3 @ Tag_ABI_FP_number_model -; ARMv8R: .eabi_attribute 34, 1 @ Tag_CPU_unaligned_access -; ARMv8R: .eabi_attribute 24, 1 @ Tag_ABI_align_needed -; ARMv8R: .eabi_attribute 25, 1 @ Tag_ABI_align_preserved ; ARMv8R-NOFPU-NOT: .eabi_attribute 27 ; ARMv8R-SP: .eabi_attribute 27, 1 @ Tag_ABI_HardFP_use ; ARMv8R-NEON-NOT: .eabi_attribute 27 ; ARMv8R-NOFPU-NOT: .eabi_attribute 36 ; ARMv8R-SP: .eabi_attribute 36, 1 @ Tag_FP_HP_extension ; ARMv8R-NEON: .eabi_attribute 36, 1 @ Tag_FP_HP_extension -; ARMv8R: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format ; ARMv8R: .eabi_attribute 42, 1 @ Tag_MPextension_use -; ARMv8R: .eabi_attribute 14, 0 @ Tag_ABI_PCS_R9_use ; ARMv8R: .eabi_attribute 68, 2 @ Tag_Virtualization_use +; ARMv8R: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format +; ARMv8R: .eabi_attribute 14, 0 @ Tag_ABI_PCS_R9_use define i32 @f(i64 %z) { ret i32 0 diff --git a/test/CodeGen/ARM/darwin-tls-preserved.ll b/test/CodeGen/ARM/darwin-tls-preserved.ll new file mode 100644 index 0000000000000..4969fabfd9b3c --- /dev/null +++ b/test/CodeGen/ARM/darwin-tls-preserved.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -arm-atomic-cfg-tidy=0 -o - %s | FileCheck %s + +@tls_var = thread_local global i32 0 + +; r9 and r12 can be live across the asm, but those get clobbered by the TLS +; access (in a different BB to order it). +define i32 @test_regs_preserved(i32* %ptr1, i32* %ptr2, i1 %tst1) { +; CHECK-LABEL: test_regs_preserved: +; CHECK: str {{.*}}, [sp +; CHECK: mov {{.*}}, r12 +entry: + call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r10},~{r11},~{r13},~{lr}"() + br i1 %tst1, label %get_tls, label %done + +get_tls: + %val = load i32, i32* @tls_var + br label %done + +done: + %res = phi i32 [%val, %get_tls], [0, %entry] + store i32 42, i32* %ptr1 + store i32 42, i32* %ptr2 + ret i32 %res +} diff --git a/test/CodeGen/ARM/divmod-hwdiv.ll b/test/CodeGen/ARM/divmod-hwdiv.ll new file mode 100644 index 0000000000000..4cc316ffa3ea6 --- /dev/null +++ b/test/CodeGen/ARM/divmod-hwdiv.ll @@ -0,0 +1,37 @@ +; The hwdiv subtarget feature should only influence thumb, not arm. +; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV +; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV +; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,THUMB-HWDIV +; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV + +; The hwdiv-arm subtarget feature should only influence arm, not thumb. +; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,ARM-HWDIV +; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV +; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV +; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV + +define arm_aapcscc i32 @test_i32_srem(i32 %x, i32 %y) { +; ALL-LABEL: test_i32_srem: +; ARM-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1 +; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 +; ARM-HWDIV: sub r0, r0, [[P]] +; THUMB-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1 +; THUMB-HWDIV: mls r0, [[Q]], r1, r0 +; AEABI-NOHWDIV: bl __aeabi_idivmod +; AEABI-NOHWDIV: mov r0, r1 + %r = srem i32 %x, %y + ret i32 %r +} + +define arm_aapcscc i32 @test_i32_urem(i32 %x, i32 %y) { +; ALL-LABEL: test_i32_urem: +; ARM-HWDIV: udiv [[Q:r[0-9]+]], r0, r1 +; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 +; ARM-HWDIV: sub r0, r0, [[P]] +; THUMB-HWDIV: udiv [[Q:r[0-9]+]], r0, r1 +; THUMB-HWDIV: mls r0, [[Q]], r1, r0 +; AEABI-NOHWDIV: bl __aeabi_uidivmod +; AEABI-NOHWDIV: mov r0, r1 + %r = urem i32 %x, %y + ret i32 %r +} diff --git a/test/CodeGen/ARM/fpoffset_overflow.mir b/test/CodeGen/ARM/fpoffset_overflow.mir new file mode 100644 index 0000000000000..9c6cd931b1532 --- /dev/null +++ b/test/CodeGen/ARM/fpoffset_overflow.mir @@ -0,0 +1,94 @@ +# RUN: llc -o - %s -mtriple=thumbv7-- -run-pass=stack-protector -run-pass=prologepilog | FileCheck %s +--- +# This should trigger an emergency spill in the register scavenger because the +# frame offset into the large argument is too large. +# CHECK-LABEL: name: func0 +# CHECK: t2STRi12 killed %r7, %sp, 0, 14, _ :: (store 4 into %stack.0) +# CHECK: %r7 = t2ADDri killed %sp, 4096, 14, _, _ +# CHECK: %r11 = t2LDRi12 killed %r7, 36, 14, _ :: (load 4) +# CHECK: %r7 = t2LDRi12 %sp, 0, 14, _ :: (load 4 from %stack.0) +name: func0 +tracksRegLiveness: true +fixedStack: + - { id: 0, offset: 4084, size: 4, alignment: 4, isImmutable: true, + isAliased: false } + - { id: 1, offset: -12, size: 4096, alignment: 4, isImmutable: false, + isAliased: false } +body: | + bb.0: + %r0 = IMPLICIT_DEF + %r1 = IMPLICIT_DEF + %r2 = IMPLICIT_DEF + %r3 = IMPLICIT_DEF + %r4 = IMPLICIT_DEF + %r5 = IMPLICIT_DEF + %r6 = IMPLICIT_DEF + %r8 = IMPLICIT_DEF + %r9 = IMPLICIT_DEF + %r10 = IMPLICIT_DEF + %r11 = IMPLICIT_DEF + %r12 = IMPLICIT_DEF + %lr = IMPLICIT_DEF + + %r11 = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4) + + KILL %r0 + KILL %r1 + KILL %r2 + KILL %r3 + KILL %r4 + KILL %r5 + KILL %r6 + KILL %r8 + KILL %r9 + KILL %r10 + KILL %r11 + KILL %r12 + KILL %lr +... +--- +# This should not trigger an emergency spill yet. +# CHECK-LABEL: name: func1 +# CHECK-NOT: t2STRi12 +# CHECK-NOT: t2ADDri +# CHECK: %r11 = t2LDRi12 %sp, 4092, 14, _ :: (load 4) +# CHECK-NOT: t2LDRi12 +name: func1 +tracksRegLiveness: true +fixedStack: + - { id: 0, offset: 4044, size: 4, alignment: 4, isImmutable: true, + isAliased: false } + - { id: 1, offset: -12, size: 4056, alignment: 4, isImmutable: false, + isAliased: false } +body: | + bb.0: + %r0 = IMPLICIT_DEF + %r1 = IMPLICIT_DEF + %r2 = IMPLICIT_DEF + %r3 = IMPLICIT_DEF + %r4 = IMPLICIT_DEF + %r5 = IMPLICIT_DEF + %r6 = IMPLICIT_DEF + %r8 = IMPLICIT_DEF + %r9 = IMPLICIT_DEF + %r10 = IMPLICIT_DEF + %r11 = IMPLICIT_DEF + %r12 = IMPLICIT_DEF + %lr = IMPLICIT_DEF + + %r11 = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4) + + KILL %r0 + KILL %r1 + KILL %r2 + KILL %r3 + KILL %r4 + KILL %r5 + KILL %r6 + KILL %r8 + KILL %r9 + KILL %r10 + KILL %r11 + KILL %r12 + KILL %lr +... diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index d874884dcb393..fb204debf6127 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -30,10 +30,9 @@ entry: define void @t1(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t1: -; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] -; CHECK: adds r0, #15 -; CHECK: adds r1, #15 +; CHECK: movs [[INC:r[0-9]+]], #15 +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1], [[INC]] +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]] ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) @@ -43,13 +42,15 @@ entry: define void @t2(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t2: +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! +; CHECK: movs [[INC:r[0-9]+]], #32 +; CHECK: add.w r3, r0, #16 +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]] ; CHECK: movw [[REG2:r[0-9]+]], #16716 ; CHECK: movt [[REG2:r[0-9]+]], #72 -; CHECK: str [[REG2]], [r0, #32] -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! +; CHECK: str [[REG2]], [r0] ; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) ret void } diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll index f6f8d5623509e..b86874692acad 100644 --- a/test/CodeGen/ARM/memset-inline.ll +++ b/test/CodeGen/ARM/memset-inline.ll @@ -13,10 +13,10 @@ entry: define void @t2() nounwind ssp { entry: ; CHECK-LABEL: t2: -; CHECK: add.w r1, r0, #10 ; CHECK: vmov.i32 {{q[0-9]+}}, #0x0 -; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] -; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK: movs r1, #10 +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2], r1 +; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll index db9bc6ccdd0c8..0a7f7698fa88c 100644 --- a/test/CodeGen/ARM/vbits.ll +++ b/test/CodeGen/ARM/vbits.ll @@ -1,8 +1,14 @@ -; RUN: llc -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 | FileCheck %s define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_andi8: -;CHECK: vand +; CHECK-LABEL: v_andi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vand d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = and <8 x i8> %tmp1, %tmp2 @@ -10,8 +16,13 @@ define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_andi16: -;CHECK: vand +; CHECK-LABEL: v_andi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vand d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = and <4 x i16> %tmp1, %tmp2 @@ -19,8 +30,13 @@ define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_andi32: -;CHECK: vand +; CHECK-LABEL: v_andi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vand d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = and <2 x i32> %tmp1, %tmp2 @@ -28,8 +44,13 @@ define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_andi64: -;CHECK: vand +; CHECK-LABEL: v_andi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vand d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = and <1 x i64> %tmp1, %tmp2 @@ -37,8 +58,14 @@ define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_andQi8: -;CHECK: vand +; CHECK-LABEL: v_andQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vand q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = and <16 x i8> %tmp1, %tmp2 @@ -46,8 +73,14 @@ define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_andQi16: -;CHECK: vand +; CHECK-LABEL: v_andQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vand q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = and <8 x i16> %tmp1, %tmp2 @@ -55,8 +88,14 @@ define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_andQi32: -;CHECK: vand +; CHECK-LABEL: v_andQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vand q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = and <4 x i32> %tmp1, %tmp2 @@ -64,8 +103,14 @@ define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_andQi64: -;CHECK: vand +; CHECK-LABEL: v_andQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vand q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = and <2 x i64> %tmp1, %tmp2 @@ -73,8 +118,13 @@ define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_bici8: -;CHECK: vbic +; CHECK-LABEL: v_bici8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vbic d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -83,8 +133,13 @@ define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_bici16: -;CHECK: vbic +; CHECK-LABEL: v_bici16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vbic d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 > @@ -93,8 +148,13 @@ define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_bici32: -;CHECK: vbic +; CHECK-LABEL: v_bici32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vbic d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 > @@ -103,8 +163,13 @@ define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_bici64: -;CHECK: vbic +; CHECK-LABEL: v_bici64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vbic d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = xor <1 x i64> %tmp2, < i64 -1 > @@ -113,8 +178,14 @@ define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_bicQi8: -;CHECK: vbic +; CHECK-LABEL: v_bicQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vbic q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -123,8 +194,14 @@ define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_bicQi16: -;CHECK: vbic +; CHECK-LABEL: v_bicQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vbic q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > @@ -133,8 +210,14 @@ define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_bicQi32: -;CHECK: vbic +; CHECK-LABEL: v_bicQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vbic q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 > @@ -143,8 +226,14 @@ define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_bicQi64: -;CHECK: vbic +; CHECK-LABEL: v_bicQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vbic q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 > @@ -153,8 +242,13 @@ define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_eori8: -;CHECK: veor +; CHECK-LABEL: v_eori8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: veor d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = xor <8 x i8> %tmp1, %tmp2 @@ -162,8 +256,13 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_eori16: -;CHECK: veor +; CHECK-LABEL: v_eori16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: veor d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = xor <4 x i16> %tmp1, %tmp2 @@ -171,8 +270,13 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_eori32: -;CHECK: veor +; CHECK-LABEL: v_eori32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: veor d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = xor <2 x i32> %tmp1, %tmp2 @@ -180,8 +284,13 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_eori64: -;CHECK: veor +; CHECK-LABEL: v_eori64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: veor d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = xor <1 x i64> %tmp1, %tmp2 @@ -189,8 +298,14 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_eorQi8: -;CHECK: veor +; CHECK-LABEL: v_eorQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: veor q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = xor <16 x i8> %tmp1, %tmp2 @@ -198,8 +313,14 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_eorQi16: -;CHECK: veor +; CHECK-LABEL: v_eorQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: veor q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = xor <8 x i16> %tmp1, %tmp2 @@ -207,8 +328,14 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_eorQi32: -;CHECK: veor +; CHECK-LABEL: v_eorQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: veor q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = xor <4 x i32> %tmp1, %tmp2 @@ -216,8 +343,14 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_eorQi64: -;CHECK: veor +; CHECK-LABEL: v_eorQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: veor q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = xor <2 x i64> %tmp1, %tmp2 @@ -225,72 +358,113 @@ define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind { -;CHECK-LABEL: v_mvni8: -;CHECK: vmvn +; CHECK-LABEL: v_mvni8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vmvn d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > ret <8 x i8> %tmp2 } define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind { -;CHECK-LABEL: v_mvni16: -;CHECK: vmvn +; CHECK-LABEL: v_mvni16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vmvn d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 > ret <4 x i16> %tmp2 } define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind { -;CHECK-LABEL: v_mvni32: -;CHECK: vmvn +; CHECK-LABEL: v_mvni32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vmvn d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 > ret <2 x i32> %tmp2 } define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind { -;CHECK-LABEL: v_mvni64: -;CHECK: vmvn +; CHECK-LABEL: v_mvni64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vmvn d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = xor <1 x i64> %tmp1, < i64 -1 > ret <1 x i64> %tmp2 } define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind { -;CHECK-LABEL: v_mvnQi8: -;CHECK: vmvn +; CHECK-LABEL: v_mvnQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > ret <16 x i8> %tmp2 } define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind { -;CHECK-LABEL: v_mvnQi16: -;CHECK: vmvn +; CHECK-LABEL: v_mvnQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > ret <8 x i16> %tmp2 } define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind { -;CHECK-LABEL: v_mvnQi32: -;CHECK: vmvn +; CHECK-LABEL: v_mvnQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 > ret <4 x i32> %tmp2 } define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind { -;CHECK-LABEL: v_mvnQi64: -;CHECK: vmvn +; CHECK-LABEL: v_mvnQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 > ret <2 x i64> %tmp2 } define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_orri8: -;CHECK: vorr +; CHECK-LABEL: v_orri8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorr d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = or <8 x i8> %tmp1, %tmp2 @@ -298,8 +472,13 @@ define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_orri16: -;CHECK: vorr +; CHECK-LABEL: v_orri16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorr d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = or <4 x i16> %tmp1, %tmp2 @@ -307,8 +486,13 @@ define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_orri32: -;CHECK: vorr +; CHECK-LABEL: v_orri32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorr d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = or <2 x i32> %tmp1, %tmp2 @@ -316,8 +500,13 @@ define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_orri64: -;CHECK: vorr +; CHECK-LABEL: v_orri64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorr d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = or <1 x i64> %tmp1, %tmp2 @@ -325,8 +514,14 @@ define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_orrQi8: -;CHECK: vorr +; CHECK-LABEL: v_orrQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorr q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = or <16 x i8> %tmp1, %tmp2 @@ -334,8 +529,14 @@ define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_orrQi16: -;CHECK: vorr +; CHECK-LABEL: v_orrQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorr q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = or <8 x i16> %tmp1, %tmp2 @@ -343,8 +544,14 @@ define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_orrQi32: -;CHECK: vorr +; CHECK-LABEL: v_orrQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorr q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = or <4 x i32> %tmp1, %tmp2 @@ -352,8 +559,14 @@ define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_orrQi64: -;CHECK: vorr +; CHECK-LABEL: v_orrQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorr q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = or <2 x i64> %tmp1, %tmp2 @@ -361,8 +574,13 @@ define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_orni8: -;CHECK: vorn +; CHECK-LABEL: v_orni8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorn d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -371,8 +589,13 @@ define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_orni16: -;CHECK: vorn +; CHECK-LABEL: v_orni16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorn d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 > @@ -381,8 +604,13 @@ define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_orni32: -;CHECK: vorn +; CHECK-LABEL: v_orni32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorn d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 > @@ -391,8 +619,13 @@ define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_orni64: -;CHECK: vorn +; CHECK-LABEL: v_orni64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorn d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = xor <1 x i64> %tmp2, < i64 -1 > @@ -401,8 +634,14 @@ define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_ornQi8: -;CHECK: vorn +; CHECK-LABEL: v_ornQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorn q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -411,8 +650,14 @@ define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_ornQi16: -;CHECK: vorn +; CHECK-LABEL: v_ornQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorn q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > @@ -421,8 +666,14 @@ define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_ornQi32: -;CHECK: vorn +; CHECK-LABEL: v_ornQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorn q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 > @@ -431,8 +682,14 @@ define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_ornQi64: -;CHECK: vorn +; CHECK-LABEL: v_ornQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorn q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 > @@ -441,8 +698,13 @@ define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vtsti8: -;CHECK: vtst.8 +; CHECK-LABEL: vtsti8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtst.8 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = and <8 x i8> %tmp1, %tmp2 @@ -452,8 +714,13 @@ define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: vtsti16: -;CHECK: vtst.16 +; CHECK-LABEL: vtsti16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtst.16 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = and <4 x i16> %tmp1, %tmp2 @@ -463,8 +730,13 @@ define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vtsti32: -;CHECK: vtst.32 +; CHECK-LABEL: vtsti32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtst.32 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = and <2 x i32> %tmp1, %tmp2 @@ -474,8 +746,14 @@ define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: vtstQi8: -;CHECK: vtst.8 +; CHECK-LABEL: vtstQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtst.8 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = and <16 x i8> %tmp1, %tmp2 @@ -485,8 +763,14 @@ define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vtstQi16: -;CHECK: vtst.16 +; CHECK-LABEL: vtstQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtst.16 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = and <8 x i16> %tmp1, %tmp2 @@ -496,8 +780,14 @@ define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vtstQi32: -;CHECK: vtst.32 +; CHECK-LABEL: vtstQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtst.32 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = and <4 x i32> %tmp1, %tmp2 @@ -508,19 +798,24 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind { ; CHECK-LABEL: v_orrimm: -; CHECK-NOT: vmov -; CHECK-NOT: vmvn -; CHECK: vorr +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vorr.i32 d16, #0x1000000 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1> ret <8 x i8> %tmp3 } define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind { -; CHECK: v_orrimmQ -; CHECK-NOT: vmov -; CHECK-NOT: vmvn -; CHECK: vorr +; CHECK-LABEL: v_orrimmQ: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vorr.i32 q8, #0x1000000 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1> ret <16 x i8> %tmp3 @@ -528,9 +823,11 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind { define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind { ; CHECK-LABEL: v_bicimm: -; CHECK-NOT: vmov -; CHECK-NOT: vmvn -; CHECK: vbic +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vbic.i32 d16, #0xff000000 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 > ret <8 x i8> %tmp3 @@ -538,10 +835,29 @@ define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind { define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind { ; CHECK-LABEL: v_bicimmQ: -; CHECK-NOT: vmov -; CHECK-NOT: vmvn -; CHECK: vbic +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vbic.i32 q8, #0xff000000 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 > ret <16 x i8> %tmp3 } + +define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) nounwind { +; CHECK-LABEL: hidden_not_v4i32: +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: vmov.i32 q8, #0x6 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vbic q8, q8, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr + %xor = xor <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15> + %and = and <4 x i32> %xor, <i32 6, i32 6, i32 6, i32 6> + ret <4 x i32> %and +} + diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll index ed734723a86d3..4f7ebc938d4c7 100644 --- a/test/CodeGen/ARM/vector-load.ll +++ b/test/CodeGen/ARM/vector-load.ll @@ -253,11 +253,22 @@ define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) { } ; CHECK-LABEL: test_silly_load: -; CHECK: ldr {{r[0-9]+}}, [r0, #24] -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]! -; CHECK: vldr d{{[0-9]+}}, [r0] +; CHECK: vldr d{{[0-9]+}}, [r0, #16] +; CHECK: movs r1, #24 +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128], r1 +; CHECK: ldr {{r[0-9]+}}, [r0] define void @test_silly_load(<28 x i8>* %addr) { load volatile <28 x i8>, <28 x i8>* %addr ret void } + +define <4 x i32>* @test_vld1_immoffset(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) { +; CHECK-LABEL: test_vld1_immoffset: +; CHECK: movs [[INC:r[0-9]+]], #32 +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0], [[INC]] + %val = load <4 x i32>, <4 x i32>* %ptr.in + store <4 x i32> %val, <4 x i32>* %ptr.out + %next = getelementptr <4 x i32>, <4 x i32>* %ptr.in, i32 2 + ret <4 x i32>* %next +} diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll index 161bbf1d0fde8..e8c1a78a9113b 100644 --- a/test/CodeGen/ARM/vector-store.ll +++ b/test/CodeGen/ARM/vector-store.ll @@ -256,3 +256,13 @@ define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val) store <4 x i8>* %inc, <4 x i8>** %ptr ret void } + +define <4 x i32>* @test_vst1_1reg(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) { +; CHECK-LABEL: test_vst1_1reg: +; CHECK: movs [[INC:r[0-9]+]], #32 +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r1], [[INC]] + %val = load <4 x i32>, <4 x i32>* %ptr.in + store <4 x i32> %val, <4 x i32>* %ptr.out + %next = getelementptr <4 x i32>, <4 x i32>* %ptr.out, i32 2 + ret <4 x i32>* %next +} diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll index c6d5747f35093..71ca0f7915242 100644 --- a/test/CodeGen/ARM/vlddup.ll +++ b/test/CodeGen/ARM/vlddup.ll @@ -310,6 +310,23 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind { ret <4 x i16> %tmp5 } +define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind { +;CHECK-LABEL: vld2dupi16_odd_update: +;CHECK: mov [[INC:r[0-9]+]], #6 +;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]] + %A = load i16*, i16** %ptr + %A2 = bitcast i16* %A to i8* + %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) + %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1 + %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp5 = add <4 x i16> %tmp2, %tmp4 + %tmp6 = getelementptr i16, i16* %A, i32 3 + store i16* %tmp6, i16** %ptr + ret <4 x i16> %tmp5 +} + define <2 x i32> @vld2dupi32(i8* %A) nounwind { ;CHECK-LABEL: vld2dupi32: ;Check the alignment value. Max for this instruction is 64 bits: diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index 2c14bc2d8f4eb..866641f3fbbd9 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -150,6 +150,22 @@ define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind { ret <2 x i32> %tmp5 } +define <2 x i32> @vld2lanei32_odd_update(i32** %ptr, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vld2lanei32_odd_update: +;CHECK: mov [[INC:r[0-9]+]], #12 +;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}], [[INC]] + %A = load i32*, i32** %ptr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 + %tmp5 = add <2 x i32> %tmp3, %tmp4 + %tmp6 = getelementptr i32, i32* %A, i32 3 + store i32* %tmp6, i32** %ptr + ret <2 x i32> %tmp5 +} + define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: vld2lanef: ;CHECK: vld2.32 diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll index e4dd572a41b4d..2e0718877e96d 100644 --- a/test/CodeGen/ARM/vtbl.ll +++ b/test/CodeGen/ARM/vtbl.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - -verify-machineinstrs | FileCheck %s %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } |