diff options
Diffstat (limited to 'test/CodeGen/ARM')
64 files changed, 639 insertions, 449 deletions
diff --git a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll index ff3b7e16188e..fefe16747f10 100644 --- a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll +++ b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll @@ -24,7 +24,7 @@ entry:  ; CHECK-LABEL: caller:  define void @caller() { -; CHECK:      ldm     r0, {r1, r2, r3} +; CHECK:      ldm     r{{[0-9]+}}, {r1, r2, r3}    call void @t(i32 0, %struct.s* @v);    ret void  } diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir index 16642d85d9cf..6a1da0dfe85f 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -42,6 +42,9 @@    define void @test_constant_imm() { ret void }    define void @test_constant_cimm() { ret void } +  define void @test_select_s32() { ret void } +  define void @test_select_ptr() { ret void } +    define void @test_soft_fp_double() #0 { ret void }    attributes #0 = { "target-features"="+vfp2,-neonfp" } @@ -1100,6 +1103,76 @@ body:             |      BX_RET 14, _, implicit %r0  ...  --- +name:            test_select_s32 +# CHECK-LABEL: name: test_select_s32 +legalized:       true +regBankSelected: true +selected:        false +# CHECK: selected: true +registers: +  - { id: 0, class: gprb } +  - { id: 1, class: gprb } +  - { id: 2, class: gprb } +  - { id: 3, class: gprb } +body:             | +  bb.0: +    liveins: %r0, %r1, %r2 + +    %0(s32) = COPY %r0 +    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + +    %1(s32) = COPY %r1 +    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + +    %2(s1) = COPY %r2 +    ; CHECK: [[VREGC:%[0-9]+]] = COPY %r2 + +    %3(s32) = G_SELECT %2(s1),  %0, %1 +    ; CHECK: CMPri [[VREGC]], 0, 14, _, implicit-def %cpsr +    ; CHECK: [[RES:%[0-9]+]] = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr + +    %r0 = COPY %3(s32) +    ; CHECK: %r0 = COPY [[RES]] + +    BX_RET 14, _, implicit %r0 +    ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name:            test_select_ptr +# CHECK-LABEL: name: test_select_ptr +legalized:       true +regBankSelected: true +selected:        false +# CHECK: selected: true +registers: +  - { id: 0, class: gprb } +  - { id: 1, class: gprb } +  - { id: 2, class: gprb } +  - { id: 3, class: gprb } +body:             | +  bb.0: +    liveins: %r0, %r1, %r2 + +    %0(p0) = COPY %r0 +    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + +    %1(p0) = COPY %r1 +    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + +    %2(s1) = COPY %r2 +    ; CHECK: [[VREGC:%[0-9]+]] = COPY %r2 + +    %3(p0) = G_SELECT %2(s1),  %0, %1 +    ; CHECK: CMPri [[VREGC]], 0, 14, _, implicit-def %cpsr +    ; CHECK: [[RES:%[0-9]+]] = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr + +    %r0 = COPY %3(p0) +    ; CHECK: %r0 = COPY [[RES]] + +    BX_RET 14, _, implicit %r0 +    ; CHECK: BX_RET 14, _, implicit %r0 +... +---  name:            test_soft_fp_double  # CHECK-LABEL: name: test_soft_fp_double  legalized:       true diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index 0ff8d52e94c6..f50916e4b474 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -910,7 +910,7 @@ define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x) {  define i32 @test_shufflevector_s32_v2s32(i32 %arg) {  ; CHECK-LABEL: name: test_shufflevector_s32_v2s32  ; CHECK: [[ARG:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = IMPLICIT_DEF +; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = G_IMPLICIT_DEF  ; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0  ; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32)  ; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>) @@ -925,7 +925,7 @@ define i32 @test_shufflevector_v2s32_v3s32(i32 %arg1, i32 %arg2) {  ; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32  ; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0  ; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF +; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF  ; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0  ; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1  ; CHECK-DAG: [[MASK:%[0-9]+]](<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32), [[C1]](s32) @@ -945,7 +945,7 @@ define i32 @test_shufflevector_v2s32_v4s32(i32 %arg1, i32 %arg2) {  ; CHECK-LABEL: name: test_shufflevector_v2s32_v4s32  ; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0  ; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF +; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF  ; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0  ; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1  ; CHECK-DAG: [[MASK:%[0-9]+]](<4 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32), [[C0]](s32), [[C0]](s32) @@ -966,7 +966,7 @@ define i32 @test_shufflevector_v4s32_v2s32(i32 %arg1, i32 %arg2, i32 %arg3, i32  ; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1  ; CHECK: [[ARG3:%[0-9]+]](s32) = COPY %r2  ; CHECK: [[ARG4:%[0-9]+]](s32) = COPY %r3 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = IMPLICIT_DEF +; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = G_IMPLICIT_DEF  ; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0  ; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1  ; CHECK-DAG: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 @@ -1009,7 +1009,7 @@ define i32 @test_constantstruct_v2s32_s32_s32() {  ; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32)  ; CHECK: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3  ; CHECK: [[C4:%[0-9]+]](s32) = G_CONSTANT i32 4 -; CHECK: [[C5:%[0-9]+]](s128) = IMPLICIT_DEF +; CHECK: [[C5:%[0-9]+]](s128) = G_IMPLICIT_DEF  ; CHECK: [[C6:%[0-9]+]](s128) = G_INSERT [[C5]], [[VEC]](<2 x s32>), 0  ; CHECK: [[C7:%[0-9]+]](s128) = G_INSERT [[C6]], [[C3]](s32), 64  ; CHECK: [[C8:%[0-9]+]](s128) = G_INSERT [[C7]], [[C4]](s32), 96 diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll index 76fb39ecea01..4c498ff6ca9b 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -400,3 +400,23 @@ entry:    %r = zext i1 %v to i32    ret i32 %r  } + +define arm_aapcscc i32 @test_select_i32(i32 %a, i32 %b, i1 %cond) { +; CHECK-LABEL: test_select_i32 +; CHECK: cmp r2, #0 +; CHECK: moveq r0, r1 +; CHECK: bx lr +entry: +  %r = select i1 %cond, i32 %a, i32 %b +  ret i32 %r +} + +define arm_aapcscc i32* @test_select_ptr(i32* %a, i32* %b, i1 %cond) { +; CHECK-LABEL: test_select_ptr +; CHECK: cmp r2, #0 +; CHECK: moveq r0, r1 +; CHECK: bx lr +entry: +  %r = select i1 %cond, i32* %a, i32* %b +  ret i32* %r +} diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index 2def31eb1592..bf759728c365 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -39,6 +39,9 @@    define void @test_icmp_s16() { ret void }    define void @test_icmp_s32() { ret void } +  define void @test_select_s32() { ret void } +  define void @test_select_ptr() { ret void } +    define void @test_fadd_s32() #0 { ret void }    define void @test_fadd_s64() #0 { ret void } @@ -775,6 +778,58 @@ body:             |      BX_RET 14, _, implicit %r0  ...  --- +name:            test_select_s32 +# CHECK-LABEL: name: test_select_s32 +legalized:       false +# CHECK: legalized: true +regBankSelected: false +selected:        false +tracksRegLiveness: true +registers: +  - { id: 0, class: _ } +  - { id: 1, class: _ } +  - { id: 2, class: _ } +  - { id: 3, class: _ } +body:             | +  bb.0: +    liveins: %r0, %r1, %r2 + +    %0(s32) = COPY %r0 +    %1(s32) = COPY %r1 +    %2(s1) = COPY %r2 +    %3(s32) = G_SELECT %2(s1), %0, %1 +    ; G_SELECT with s32 is legal, so we should find it unchanged in the output +    ; CHECK: {{%[0-9]+}}(s32) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}} +    %r0 = COPY %3(s32) +    BX_RET 14, _, implicit %r0 +... +--- +name:            test_select_ptr +# CHECK-LABEL: name: test_select_ptr +legalized:       false +# CHECK: legalized: true +regBankSelected: false +selected:        false +tracksRegLiveness: true +registers: +  - { id: 0, class: _ } +  - { id: 1, class: _ } +  - { id: 2, class: _ } +  - { id: 3, class: _ } +body:             | +  bb.0: +    liveins: %r0, %r1, %r2 + +    %0(p0) = COPY %r0 +    %1(p0) = COPY %r1 +    %2(s1) = COPY %r2 +    %3(p0) = G_SELECT %2(s1), %0, %1 +    ; G_SELECT with p0 is legal, so we should find it unchanged in the output +    ; CHECK: {{%[0-9]+}}(p0) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}} +    %r0 = COPY %3(p0) +    BX_RET 14, _, implicit %r0 +... +---  name:            test_fadd_s32  # CHECK-LABEL: name: test_fadd_s32  legalized:       false diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index d97dd60bac22..d3b93e488ef4 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -36,6 +36,8 @@    define void @test_icmp_eq_s32() { ret void } +  define void @test_select_s32() { ret void } +    define void @test_fadd_s32() #0 { ret void }    define void @test_fadd_s64() #0 { ret void } @@ -741,6 +743,35 @@ body:             |  ...  --- +name:            test_select_s32 +# CHECK-LABEL: name: test_select_s32 +legalized:       true +regBankSelected: false +selected:        false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } + +registers: +  - { id: 0, class: _ } +  - { id: 1, class: _ } +  - { id: 2, class: _ } +  - { id: 3, class: _ } +body:             | +  bb.0: +    liveins: %r0, %r1, %r2 + +    %0(s32) = COPY %r0 +    %1(s32) = COPY %r1 +    %2(s1) = COPY %r2 +    %3(s32) = G_SELECT %2(s1), %0, %1 +    %r0 = COPY %3(s32) +    BX_RET 14, _, implicit %r0 + +... +---  name:            test_fadd_s32  # CHECK-LABEL: name: test_fadd_s32  legalized:       true diff --git a/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll b/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll index d303e9da8604..a73a7cf8414f 100644 --- a/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll +++ b/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll @@ -19,9 +19,9 @@ entry:  ; CHECK-LABEL: isel  ; CHECK: push {r4, r5} -; CHECK: movw r4, #{{\d*}}  ; CHECK: movw r12, #0  ; CHECK: movt r12, #0 +; CHECK: movw r4, #{{\d*}}  ; CHECK: blx r12  ; CHECK: sub.w sp, sp, r4 diff --git a/test/CodeGen/ARM/Windows/no-arm-mode.ll b/test/CodeGen/ARM/Windows/no-arm-mode.ll deleted file mode 100644 index 30353640a4cc..000000000000 --- a/test/CodeGen/ARM/Windows/no-arm-mode.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: not llc -mtriple=armv7-windows-itanium -mcpu=cortex-a9 -o /dev/null %s 2>&1 \ -; RUN:   | FileCheck %s -check-prefix CHECK-WIN - -; RUN: not llc -mtriple=armv7-windows-gnu -mcpu=cortex-a9 -o /dev/null %s 2>&1 \ -; RUN:   | FileCheck %s -check-prefix CHECK-GNU - -; CHECK-WIN: does not support ARM mode execution - -; CHECK-GNU: does not support ARM mode execution - diff --git a/test/CodeGen/ARM/Windows/tls.ll b/test/CodeGen/ARM/Windows/tls.ll index 947e29dfa65c..2c38ad3e58f7 100644 --- a/test/CodeGen/ARM/Windows/tls.ll +++ b/test/CodeGen/ARM/Windows/tls.ll @@ -15,11 +15,11 @@ define i32 @f() {  ; CHECK:      mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 +; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK:      movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index  ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index  ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] -; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]  ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -36,11 +36,11 @@ define i32 @e() {  ; CHECK:      mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 +; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK:      movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index  ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index  ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] -; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]  ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -57,11 +57,11 @@ define i32 @d() {  ; CHECK:      mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 +; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK:      movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index  ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index  ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] -; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]  ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -78,11 +78,11 @@ define i32 @c() {  ; CHECK:      mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 +; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK:      movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index  ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index  ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] -; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]  ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -99,11 +99,11 @@ define i32 @b() {  ; CHECK:      mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 +; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK:      movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index  ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index  ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] -; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]  ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -120,11 +120,11 @@ define i16 @a() {  ; CHECK:      mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 +; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK:      movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index  ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index  ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] -; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]  ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -141,11 +141,11 @@ define i8 @Z() {  ; CHECK:      mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 +; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK:      movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index  ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index  ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] -; CHECK:      ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]  ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]  ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] diff --git a/test/CodeGen/ARM/alloca.ll b/test/CodeGen/ARM/alloca.ll index 4a0835a2c0ca..82b6b11ea4b2 100644 --- a/test/CodeGen/ARM/alloca.ll +++ b/test/CodeGen/ARM/alloca.ll @@ -2,11 +2,11 @@  define void @f(i32 %a) {  entry: -; CHECK: add  r11, sp, #4 +; CHECK: add  r11, sp, #8          %tmp = alloca i8, i32 %a                ; <i8*> [#uses=1]          call void @g( i8* %tmp, i32 %a, i32 1, i32 2, i32 3 )          ret void -; CHECK: sub  sp, r11, #4 +; CHECK: sub  sp, r11, #8  }  declare void @g(i8*, i32, i32, i32, i32) diff --git a/test/CodeGen/ARM/arg-copy-elide.ll b/test/CodeGen/ARM/arg-copy-elide.ll index 739b560b0833..625b57073406 100644 --- a/test/CodeGen/ARM/arg-copy-elide.ll +++ b/test/CodeGen/ARM/arg-copy-elide.ll @@ -31,8 +31,8 @@ entry:  ; CHECK-LABEL: use_arg:  ; CHECK: push {[[csr:[^ ]*]], lr} -; CHECK: ldr [[csr]], [sp, #8]  ; CHECK: add r0, sp, #8 +; CHECK: ldr [[csr]], [sp, #8]  ; CHECK: bl addrof_i32  ; CHECK: mov r0, [[csr]]  ; CHECK: pop {[[csr]], pc} @@ -50,8 +50,8 @@ entry:  ; CHECK: push    {r4, r5, r11, lr}  ; CHECK: sub     sp, sp, #8  ; CHECK: ldr     r4, [sp, #28] -; CHECK: ldr     r5, [sp, #24]  ; CHECK: mov     r0, sp +; CHECK: ldr     r5, [sp, #24]  ; CHECK: str     r4, [sp, #4]  ; CHECK: str     r5, [sp]  ; CHECK: bl      addrof_i64 diff --git a/test/CodeGen/ARM/arm-abi-attr.ll b/test/CodeGen/ARM/arm-abi-attr.ll index 61cb6cefa170..f05e6e788d6f 100644 --- a/test/CodeGen/ARM/arm-abi-attr.ll +++ b/test/CodeGen/ARM/arm-abi-attr.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm-linux-gnu < %s | FileCheck %s --check-prefix=APCS +; RUN: llc -mtriple=arm-linux-gnu < %s | FileCheck %s --check-prefix=AAPCS  ; RUN: llc -mtriple=arm-linux-gnu -target-abi=apcs < %s | \  ; RUN: FileCheck %s --check-prefix=APCS  ; RUN: llc -mtriple=arm-linux-gnueabi -target-abi=apcs < %s | \ diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 31691e9468c9..af05392c98a5 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -171,8 +171,8 @@ define i32 @test_tst_assessment(i32 %a, i32 %b) {  ;  ; V8-LABEL: test_tst_assessment:  ; V8:       @ BB#0: -; V8-NEXT:    lsls r1, r1, #31  ; V8-NEXT:    and r0, r0, #1 +; V8-NEXT:    lsls r1, r1, #31  ; V8-NEXT:    it ne  ; V8-NEXT:    subne r0, #1  ; V8-NEXT:    bx lr diff --git a/test/CodeGen/ARM/arm-position-independence-jump-table.ll b/test/CodeGen/ARM/arm-position-independence-jump-table.ll index 790b4f41776e..afc2d38be18c 100644 --- a/test/CodeGen/ARM/arm-position-independence-jump-table.ll +++ b/test/CodeGen/ARM/arm-position-independence-jump-table.ll @@ -47,8 +47,8 @@ lab4:  ; CHECK-LABEL: jump_table: -; ARM: lsl     r[[R_TAB_IDX:[0-9]+]], r{{[0-9]+}}, #2  ; ARM: adr     r[[R_TAB_BASE:[0-9]+]], [[LJTI:\.LJTI[0-9]+_[0-9]+]] +; ARM: lsl     r[[R_TAB_IDX:[0-9]+]], r{{[0-9]+}}, #2  ; ARM_ABS: ldr     pc, [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]  ; ARM_PC:  ldr     r[[R_OFFSET:[0-9]+]], [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]  ; ARM_PC:  add     pc, r[[R_OFFSET]], r[[R_TAB_BASE]] diff --git a/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll index 1434f40137b5..7007018dd0b2 100644 --- a/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll +++ b/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll @@ -45,15 +45,19 @@ target triple = "armv7--linux-gnueabi"  ; CHECK: @ %while.cond2  ; CHECK: add  ; CHECK-NEXT: cmp r{{[0-1]+}}, #1 -; Set the return value. -; CHECK-NEXT: moveq r0, -; CHECK-NEXT: popeq +; Jump to the return block +; CHECK-NEXT: beq [[RETURN_BLOCK:[.a-zA-Z0-9_]+]]  ;  ; Use the back edge to check we get the label of the loop right.  ; This is to make sure we check the right loop pattern.  ; CHECK:  @ %while.body24.land.rhs14_crit_edge  ; CHECK: cmp r{{[0-9]+}}, #192  ; CHECK-NEXT bhs [[LOOP_HEADER]] +; +; CHECK: [[RETURN_BLOCK]]: +; Set the return value. +; CHECK-NEXT: mov r0, +; CHECK-NEXT: pop  define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) {  entry:    %cmp = icmp sgt i32 %off, -1 diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll index e026bae361e1..a136e44fc196 100644 --- a/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -70,8 +70,8 @@ entry:  ; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0]  ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1  ; CHECK-ARMV7-NEXT: beq [[HEAD]] -; CHECK-ARMV7-NEXT: clrex  ; CHECK-ARMV7-NEXT: mov r0, #0 +; CHECK-ARMV7-NEXT: clrex  ; CHECK-ARMV7-NEXT: bx lr  ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: @@ -88,6 +88,6 @@ entry:  ; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]  ; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]  ; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]] -; CHECK-THUMBV7-NEXT: clrex  ; CHECK-THUMBV7-NEXT: movs r0, #0 +; CHECK-THUMBV7-NEXT: clrex  ; CHECK-THUMBV7-NEXT: bx lr diff --git a/test/CodeGen/ARM/bool-ext-inc.ll b/test/CodeGen/ARM/bool-ext-inc.ll index 5f2ba8b109a7..ca9c9ab079db 100644 --- a/test/CodeGen/ARM/bool-ext-inc.ll +++ b/test/CodeGen/ARM/bool-ext-inc.ll @@ -16,8 +16,8 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) {  ; CHECK:       @ BB#0:  ; CHECK-NEXT:    vmov.i16 d16, #0x1  ; CHECK-NEXT:    vmov d17, r0, r1 -; CHECK-NEXT:    vmov.i32 q9, #0x1  ; CHECK-NEXT:    veor d16, d17, d16 +; CHECK-NEXT:    vmov.i32 q9, #0x1  ; CHECK-NEXT:    vmovl.u16 q8, d16  ; CHECK-NEXT:    vand q8, q8, q9  ; CHECK-NEXT:    vmov r0, r1, d16 @@ -31,13 +31,13 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) {  define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {  ; CHECK-LABEL: cmpgt_sext_inc_vec:  ; CHECK:       @ BB#0: -; CHECK-NEXT:    mov r12, sp -; CHECK-NEXT:    vmov d19, r2, r3 -; CHECK-NEXT:    vmov.i32 q10, #0x1 -; CHECK-NEXT:    vld1.64 {d16, d17}, [r12] -; CHECK-NEXT:    vmov d18, r0, r1 -; CHECK-NEXT:    vcge.s32 q8, q8, q9 -; CHECK-NEXT:    vand q8, q8, q10 +; CHECK-NEXT:    vmov d17, r2, r3 +; CHECK-NEXT:    vmov d16, r0, r1 +; CHECK-NEXT:    mov r0, sp +; CHECK-NEXT:    vld1.64 {d18, d19}, [r0] +; CHECK-NEXT:    vcge.s32 q8, q9, q8 +; CHECK-NEXT:    vmov.i32 q9, #0x1 +; CHECK-NEXT:    vand q8, q8, q9  ; CHECK-NEXT:    vmov r0, r1, d16  ; CHECK-NEXT:    vmov r2, r3, d17  ; CHECK-NEXT:    mov pc, lr @@ -50,13 +50,13 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {  define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {  ; CHECK-LABEL: cmpne_sext_inc_vec:  ; CHECK:       @ BB#0: +; CHECK-NEXT:    vmov d17, r2, r3  ; CHECK-NEXT:    mov r12, sp -; CHECK-NEXT:    vmov d19, r2, r3 -; CHECK-NEXT:    vmov.i32 q10, #0x1 -; CHECK-NEXT:    vld1.64 {d16, d17}, [r12] -; CHECK-NEXT:    vmov d18, r0, r1 -; CHECK-NEXT:    vceq.i32 q8, q9, q8 -; CHECK-NEXT:    vand q8, q8, q10 +; CHECK-NEXT:    vld1.64 {d18, d19}, [r12] +; CHECK-NEXT:    vmov d16, r0, r1 +; CHECK-NEXT:    vceq.i32 q8, q8, q9 +; CHECK-NEXT:    vmov.i32 q9, #0x1 +; CHECK-NEXT:    vand q8, q8, q9  ; CHECK-NEXT:    vmov r0, r1, d16  ; CHECK-NEXT:    vmov r2, r3, d17  ; CHECK-NEXT:    mov pc, lr diff --git a/test/CodeGen/ARM/cmpxchg-O0-be.ll b/test/CodeGen/ARM/cmpxchg-O0-be.ll new file mode 100644 index 000000000000..9e9a93e19b6a --- /dev/null +++ b/test/CodeGen/ARM/cmpxchg-O0-be.ll @@ -0,0 +1,26 @@ +; RUN: llc -verify-machineinstrs -mtriple=armebv8-linux-gnueabi -O0 %s -o - | FileCheck %s + +@x = global i64 10, align 8 +@y = global i64 20, align 8 +@z = global i64 20, align 8 + +; CHECK_LABEL:	main: +; CHECK:	ldr [[R2:r[0-9]+]], {{\[}}[[R1:r[0-9]+]]{{\]}} +; CHECK-NEXT:	ldr [[R1]], {{\[}}[[R1]], #4] +; CHECK:	mov [[R4:r[0-9]+]], [[R2]] +; CHECK-NEXT:	mov [[R5:r[0-9]+]], [[R1]] +; CHECK:	ldr [[R2]], {{\[}}[[R1]]{{\]}} +; CHECK-NEXT:	ldr [[R1]], {{\[}}[[R1]], #4] +; CHECK:	mov [[R6:r[0-9]+]], [[R2]] +; CHECK-NEXT:	mov [[R7:r[0-9]+]], [[R1]] + +define arm_aapcs_vfpcc i32 @main() #0 { +entry: +  %retval = alloca i32, align 4 +  store i32 0, i32* %retval, align 4 +  %0 = load i64, i64* @z, align 8 +  %1 = load i64, i64* @x, align 8 +  %2 = cmpxchg i64* @y, i64 %0, i64 %1 seq_cst seq_cst +  %3 = extractvalue { i64, i1 } %2, 1 +  ret i32 0 +} diff --git a/test/CodeGen/ARM/cmpxchg-weak.ll b/test/CodeGen/ARM/cmpxchg-weak.ll index 0d5681aafbcb..29d97fef0606 100644 --- a/test/CodeGen/ARM/cmpxchg-weak.ll +++ b/test/CodeGen/ARM/cmpxchg-weak.ll @@ -47,12 +47,12 @@ define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) {  ; CHECK-NEXT:     strex   [[SUCCESS:r[0-9]+]], r3, [r1]  ; CHECK-NEXT:     cmp     [[SUCCESS]], #0  ; CHECK-NEXT:     bxne    lr -; CHECK-NEXT:     dmb     ish  ; CHECK-NEXT:     mov     r0, #1 +; CHECK-NEXT:     dmb     ish  ; CHECK-NEXT:     bx      lr  ; CHECK-NEXT: [[LDFAILBB]]: -; CHECK-NEXT:     clrex  ; CHECK-NEXT:     mov     r0, #0 +; CHECK-NEXT:     clrex  ; CHECK-NEXT:     bx      lr    ret i1 %success diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll index b9d90249e9f6..b381aecc69a6 100644 --- a/test/CodeGen/ARM/code-placement.ll +++ b/test/CodeGen/ARM/code-placement.ll @@ -38,9 +38,8 @@ entry:    br i1 %0, label %bb5, label %bb.nph15  bb1:                                              ; preds = %bb2.preheader, %bb1 -; CHECK: LBB1_[[BB3:.]]: @ %bb3  ; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader -; CHECK: blt LBB1_[[BB3]] +; CHECK: blt LBB1_[[BB3:.]]    %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]    %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]    %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1] @@ -54,7 +53,7 @@ bb1:                                              ; preds = %bb2.preheader, %bb1  bb3:                                              ; preds = %bb1, %bb2.preheader  ; CHECK: LBB1_[[BB1:.]]: @ %bb1  ; CHECK: bne LBB1_[[BB1]] -; CHECK: b LBB1_[[BB3]] +; CHECK: LBB1_[[BB3]]: @ %bb3    %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]    %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]    %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1] diff --git a/test/CodeGen/ARM/constantfp.ll b/test/CodeGen/ARM/constantfp.ll index 0b431f47f50b..f825061d1169 100644 --- a/test/CodeGen/ARM/constantfp.ll +++ b/test/CodeGen/ARM/constantfp.ll @@ -5,25 +5,25 @@  ; RUN: llc -mtriple=thumbv7m -mcpu=cortex-m4 %s -o - \  ; RUN: | FileCheck --check-prefix=CHECK-NO-XO %s -; RUN: llc -mtriple=thumbv7m -arm-execute-only -mcpu=cortex-m4 %s -o - \ +; RUN: llc -mtriple=thumbv7m -mattr=+execute-only -mcpu=cortex-m4 %s -o - \  ; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE %s -; RUN: llc -mtriple=thumbv7meb -arm-execute-only -mcpu=cortex-m4 %s -o - \ +; RUN: llc -mtriple=thumbv7meb -mattr=+execute-only -mcpu=cortex-m4 %s -o - \  ; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE-BE %s -; RUN: llc -mtriple=thumbv7m -arm-execute-only -mcpu=cortex-m4 -relocation-model=ropi %s -o - \ +; RUN: llc -mtriple=thumbv7m -mattr=+execute-only -mcpu=cortex-m4 -relocation-model=ropi %s -o - \  ; RUN: | FileCheck --check-prefix=CHECK-XO-ROPI %s  ; RUN: llc -mtriple=thumbv8m.main -mattr=fp-armv8 %s -o - \  ; RUN: | FileCheck --check-prefix=CHECK-NO-XO %s -; RUN: llc -mtriple=thumbv8m.main -arm-execute-only -mattr=fp-armv8 %s -o - \ +; RUN: llc -mtriple=thumbv8m.main -mattr=+execute-only -mattr=fp-armv8 %s -o - \  ; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE %s -; RUN: llc -mtriple=thumbv8m.maineb -arm-execute-only -mattr=fp-armv8 %s -o - \ +; RUN: llc -mtriple=thumbv8m.maineb -mattr=+execute-only -mattr=fp-armv8 %s -o - \  ; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE-BE %s -; RUN: llc -mtriple=thumbv8m.main -arm-execute-only -mattr=fp-armv8 -relocation-model=ropi %s -o - \ +; RUN: llc -mtriple=thumbv8m.main -mattr=+execute-only -mattr=fp-armv8 -relocation-model=ropi %s -o - \  ; RUN: | FileCheck --check-prefix=CHECK-XO-ROPI %s  define arm_aapcs_vfpcc float @test_vmov_f32() { diff --git a/test/CodeGen/ARM/cortex-a57-misched-basic.ll b/test/CodeGen/ARM/cortex-a57-misched-basic.ll index 2ec50b9d3343..cfbef7bd4293 100644 --- a/test/CodeGen/ARM/cortex-a57-misched-basic.ll +++ b/test/CodeGen/ARM/cortex-a57-misched-basic.ll @@ -8,14 +8,14 @@  ; CHECK:       ********** MI Scheduling **********  ; CHECK:      foo:BB#0 entry -; GENERIC:    SDIV +; GENERIC:    LDRi12  ; GENERIC:    Latency    : 1  ; GENERIC:    EORrr  ; GENERIC:    Latency    : 1 -; GENERIC:    LDRi12 -; GENERIC:    Latency    : 4  ; GENERIC:    ADDrr  ; GENERIC:    Latency    : 1 +; GENERIC:    SDIV +; GENERIC:    Latency    : 0  ; GENERIC:    SUBrr  ; GENERIC:    Latency    : 1 diff --git a/test/CodeGen/ARM/cortexr52-misched-basic.ll b/test/CodeGen/ARM/cortexr52-misched-basic.ll index eb2c29a3a5d1..614157eb0e10 100644 --- a/test/CodeGen/ARM/cortexr52-misched-basic.ll +++ b/test/CodeGen/ARM/cortexr52-misched-basic.ll @@ -12,10 +12,10 @@  ; GENERIC:    Latency    : 1  ; R52_SCHED:  Latency    : 3  ; CHECK:      MLA -; GENERIC:    Latency    : 1 +; GENERIC:    Latency    : 2  ; R52_SCHED:  Latency    : 4  ; CHECK:      SDIV -; GENERIC:    Latency    : 1 +; GENERIC:    Latency    : 0  ; R52_SCHED:  Latency    : 8  ; CHECK:      ** Final schedule for BB#0 ***  ; GENERIC:    EORrr diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll index 7fcc8cba0c8f..0cf87d7a97b7 100644 --- a/test/CodeGen/ARM/ctor_order.ll +++ b/test/CodeGen/ARM/ctor_order.ll @@ -1,7 +1,7 @@  ; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN  ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic  | FileCheck %s --check-prefix=DARWIN  ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static  | FileCheck %s -check-prefix=DARWIN-STATIC -; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF +; RUN: llc < %s -mtriple=arm-linux-gnu -target-abi=apcs  | FileCheck %s -check-prefix=ELF  ; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI  ; DARWIN:      .section	__DATA,__mod_init_func,mod_init_funcs diff --git a/test/CodeGen/ARM/ctors_dtors.ll b/test/CodeGen/ARM/ctors_dtors.ll index fb94626ab7dd..c097ade3c846 100644 --- a/test/CodeGen/ARM/ctors_dtors.ll +++ b/test/CodeGen/ARM/ctors_dtors.ll @@ -1,5 +1,5 @@  ; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN -; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF +; RUN: llc < %s -mtriple=arm-linux-gnu -target-abi=apcs  | FileCheck %s -check-prefix=ELF  ; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI  ; DARWIN: .section	__DATA,__mod_init_func,mod_init_funcs diff --git a/test/CodeGen/ARM/cttz.ll b/test/CodeGen/ARM/cttz.ll index dacfca505931..cba7be583310 100644 --- a/test/CodeGen/ARM/cttz.ll +++ b/test/CodeGen/ARM/cttz.ll @@ -40,8 +40,8 @@ define i64 @test_i64(i64 %a) {  ; CHECK-LABEL: test_i64:  ; CHECK: rbit  ; CHECK: rbit -; CHECK: cmp  ; CHECK: clz +; CHECK: cmp  ; CHECK: add  ; CHECK: clzne    %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) @@ -81,8 +81,8 @@ define i64 @test_i64_zero_undef(i64 %a) {  ; CHECK-LABEL: test_i64_zero_undef:  ; CHECK: rbit  ; CHECK: rbit -; CHECK: cmp  ; CHECK: clz +; CHECK: cmp  ; CHECK: add  ; CHECK: clzne    %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 true) diff --git a/test/CodeGen/ARM/cttz_vector.ll b/test/CodeGen/ARM/cttz_vector.ll index 9480d75db47a..bed644980415 100644 --- a/test/CodeGen/ARM/cttz_vector.ll +++ b/test/CodeGen/ARM/cttz_vector.ll @@ -168,17 +168,17 @@ define void @test_v4i32(<4 x i32>* %p) {  define void @test_v1i64(<1 x i64>* %p) {  ; CHECK-LABEL: test_v1i64: -; CHECK: vldr		[[D1:d[0-9]+]], [r0]  ; CHECK: vmov.i32	[[D2:d[0-9]+]], #0x0 +; CHECK: vldr		[[D1:d[0-9]+]], [r0]  ; CHECK: vmov.i64	[[D3:d[0-9]+]], #0xffffffffffffffff  ; CHECK: vsub.i64	[[D2]], [[D2]], [[D1]] -; CHECK: vand		[[D1]], [[D1]], [[D2]] -; CHECK: vadd.i64	[[D1]], [[D1]], [[D3]] -; CHECK: vcnt.8		[[D1]], [[D1]] -; CHECK: vpaddl.u8	[[D1]], [[D1]] -; CHECK: vpaddl.u16	[[D1]], [[D1]] -; CHECK: vpaddl.u32	[[D1]], [[D1]] -; CHECK: vstr		[[D1]], [r0] +; CHECK: vand		[[D2]], [[D1]], [[D2]] +; CHECK: vadd.i64	[[D2]], [[D2]], [[D3]] +; CHECK: vcnt.8		[[D2]], [[D2]] +; CHECK: vpaddl.u8	[[D2]], [[D2]] +; CHECK: vpaddl.u16	[[D2]], [[D2]] +; CHECK: vpaddl.u32	[[D2]], [[D2]] +; CHECK: vstr		[[D2]], [r0]    %a = load <1 x i64>, <1 x i64>* %p    %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false)    store <1 x i64> %tmp, <1 x i64>* %p @@ -187,17 +187,17 @@ define void @test_v1i64(<1 x i64>* %p) {  define void @test_v2i64(<2 x i64>* %p) {  ; CHECK-LABEL: test_v2i64: -; CHECK: vld1.64	{[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]  ; CHECK: vmov.i32	[[Q2:q[0-9]+]], #0x0 +; CHECK: vld1.64	{[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]  ; CHECK: vmov.i64	[[Q3:q[0-9]+]], #0xffffffffffffffff  ; CHECK: vsub.i64	[[Q2]], [[Q2]], [[Q1:q[0-9]+]] -; CHECK: vand		[[Q1]], [[Q1]], [[Q2]] -; CHECK: vadd.i64	[[Q1]], [[Q1]], [[Q3]] -; CHECK: vcnt.8		[[Q1]], [[Q1]] -; CHECK: vpaddl.u8	[[Q1]], [[Q1]] -; CHECK: vpaddl.u16	[[Q1]], [[Q1]] -; CHECK: vpaddl.u32	[[Q1]], [[Q1]] -; CHECK: vst1.64	{[[D1]], [[D2]]}, [r0] +; CHECK: vand		[[Q2]], [[Q1]], [[Q2]] +; CHECK: vadd.i64	[[Q2]], [[Q2]], [[Q3]] +; CHECK: vcnt.8		[[Q2]], [[Q2]] +; CHECK: vpaddl.u8	[[Q2]], [[Q2]] +; CHECK: vpaddl.u16	[[Q2]], [[Q2]] +; CHECK: vpaddl.u32	[[Q2]], [[Q2]] +; CHECK: vst1.64	{d{{[0-9]+}}, d{{[0-9]+}}}, [r0]    %a = load <2 x i64>, <2 x i64>* %p    %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)    store <2 x i64> %tmp, <2 x i64>* %p @@ -346,17 +346,17 @@ define void @test_v4i32_zero_undef(<4 x i32>* %p) {  define void @test_v1i64_zero_undef(<1 x i64>* %p) {  ; CHECK-LABEL: test_v1i64_zero_undef: -; CHECK: vldr		[[D1:d[0-9]+]], [r0]  ; CHECK: vmov.i32	[[D2:d[0-9]+]], #0x0 +; CHECK: vldr		[[D1:d[0-9]+]], [r0]  ; CHECK: vmov.i64	[[D3:d[0-9]+]], #0xffffffffffffffff  ; CHECK: vsub.i64	[[D2]], [[D2]], [[D1]] -; CHECK: vand		[[D1]], [[D1]], [[D2]] -; CHECK: vadd.i64	[[D1]], [[D1]], [[D3]] -; CHECK: vcnt.8		[[D1]], [[D1]] -; CHECK: vpaddl.u8	[[D1]], [[D1]] -; CHECK: vpaddl.u16	[[D1]], [[D1]] -; CHECK: vpaddl.u32	[[D1]], [[D1]] -; CHECK: vstr		[[D1]], [r0] +; CHECK: vand		[[D2]], [[D1]], [[D2]] +; CHECK: vadd.i64	[[D2]], [[D2]], [[D3]] +; CHECK: vcnt.8		[[D2]], [[D2]] +; CHECK: vpaddl.u8	[[D2]], [[D2]] +; CHECK: vpaddl.u16	[[D2]], [[D2]] +; CHECK: vpaddl.u32	[[D2]], [[D2]] +; CHECK: vstr		[[D2]], [r0]    %a = load <1 x i64>, <1 x i64>* %p    %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true)    store <1 x i64> %tmp, <1 x i64>* %p @@ -365,17 +365,17 @@ define void @test_v1i64_zero_undef(<1 x i64>* %p) {  define void @test_v2i64_zero_undef(<2 x i64>* %p) {  ; CHECK-LABEL: test_v2i64_zero_undef: -; CHECK: vld1.64	{[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]  ; CHECK: vmov.i32	[[Q2:q[0-9]+]], #0x0 +; CHECK: vld1.64	{[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]  ; CHECK: vmov.i64	[[Q3:q[0-9]+]], #0xffffffffffffffff  ; CHECK: vsub.i64	[[Q2]], [[Q2]], [[Q1:q[0-9]+]] -; CHECK: vand		[[Q1]], [[Q1]], [[Q2]] -; CHECK: vadd.i64	[[Q1]], [[Q1]], [[Q3]] -; CHECK: vcnt.8		[[Q1]], [[Q1]] -; CHECK: vpaddl.u8	[[Q1]], [[Q1]] -; CHECK: vpaddl.u16	[[Q1]], [[Q1]] -; CHECK: vpaddl.u32	[[Q1]], [[Q1]] -; CHECK: vst1.64	{[[D1]], [[D2]]}, [r0] +; CHECK: vand		[[Q2]], [[Q1]], [[Q2]] +; CHECK: vadd.i64	[[Q2]], [[Q2]], [[Q3]] +; CHECK: vcnt.8		[[Q2]], [[Q2]] +; CHECK: vpaddl.u8	[[Q2]], [[Q2]] +; CHECK: vpaddl.u16	[[Q2]], [[Q2]] +; CHECK: vpaddl.u32	[[Q2]], [[Q2]] +; CHECK: vst1.64	{d{{[0-9]+}}, d{{[0-9]+}}}, [r0]    %a = load <2 x i64>, <2 x i64>* %p    %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)    store <2 x i64> %tmp, <2 x i64>* %p diff --git a/test/CodeGen/ARM/cxx-tlscc.ll b/test/CodeGen/ARM/cxx-tlscc.ll index 6a5aa12ac5a6..6a66c5f197ef 100644 --- a/test/CodeGen/ARM/cxx-tlscc.ll +++ b/test/CodeGen/ARM/cxx-tlscc.ll @@ -26,7 +26,7 @@ declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)  ; THUMB-LABEL: _ZTW2sg  ; THUMB: push {{.*}}lr  ; THUMB: blx -; THUMB: bne [[TH_end:.?LBB0_[0-9]+]] +; THUMB: bne{{(.w)?}} [[TH_end:.?LBB0_[0-9]+]]  ; THUMB: blx  ; THUMB: tlv_atexit  ; THUMB: [[TH_end]]: diff --git a/test/CodeGen/ARM/execute-only-big-stack-frame.ll b/test/CodeGen/ARM/execute-only-big-stack-frame.ll index 0fe67f9863a5..24c6a06d6af1 100644 --- a/test/CodeGen/ARM/execute-only-big-stack-frame.ll +++ b/test/CodeGen/ARM/execute-only-big-stack-frame.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -mtriple=thumbv7m -arm-execute-only -O0 %s -o - \ +; RUN: llc < %s -mtriple=thumbv7m -mattr=+execute-only -O0 %s -o - \  ; RUN:  | FileCheck --check-prefix=CHECK-SUBW-ADDW %s -; RUN: llc < %s -mtriple=thumbv8m.base -arm-execute-only -O0 %s -o - \ +; RUN: llc < %s -mtriple=thumbv8m.base -mattr=+execute-only -O0 %s -o - \  ; RUN:  | FileCheck --check-prefix=CHECK-MOVW-MOVT-ADD %s -; RUN: llc < %s -mtriple=thumbv8m.main -arm-execute-only -O0 %s -o - \ +; RUN: llc < %s -mtriple=thumbv8m.main -mattr=+execute-only -O0 %s -o - \  ; RUN:  | FileCheck --check-prefix=CHECK-SUBW-ADDW %s  define i8 @test_big_stack_frame() { diff --git a/test/CodeGen/ARM/execute-only-section.ll b/test/CodeGen/ARM/execute-only-section.ll index 6e1973cd0f14..a3313d8c2f73 100644 --- a/test/CodeGen/ARM/execute-only-section.ll +++ b/test/CodeGen/ARM/execute-only-section.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=thumbv7m -arm-execute-only %s -o - | FileCheck %s -; RUN: llc < %s -mtriple=thumbv8m.base -arm-execute-only %s -o - | FileCheck %s -; RUN: llc < %s -mtriple=thumbv8m.main -arm-execute-only %s -o - | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7m -mattr=+execute-only %s -o - | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8m.base -mattr=+execute-only %s -o - | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8m.main -mattr=+execute-only %s -o - | FileCheck %s  ; CHECK:     .section .text,"axy",%progbits,unique,0  ; CHECK-NOT: .section diff --git a/test/CodeGen/ARM/execute-only.ll b/test/CodeGen/ARM/execute-only.ll index 1f9e8bf2813c..f8c3d279573b 100644 --- a/test/CodeGen/ARM/execute-only.ll +++ b/test/CodeGen/ARM/execute-only.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=thumbv8m.base-eabi -arm-execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2BASE %s -; RUN: llc -mtriple=thumbv7m-eabi      -arm-execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2 %s -; RUN: llc -mtriple=thumbv8m.main-eabi -arm-execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2 %s +; RUN: llc -mtriple=thumbv8m.base-eabi -mattr=+execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2BASE %s +; RUN: llc -mtriple=thumbv7m-eabi      -mattr=+execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2 %s +; RUN: llc -mtriple=thumbv8m.main-eabi -mattr=+execute-only %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-T2 %s  @var = global i32 0 diff --git a/test/CodeGen/ARM/fp16-promote.ll b/test/CodeGen/ARM/fp16-promote.ll index 9148ac109ae3..257d99d11928 100644 --- a/test/CodeGen/ARM/fp16-promote.ll +++ b/test/CodeGen/ARM/fp16-promote.ll @@ -687,8 +687,8 @@ define void @test_maxnan(half* %p) #0 {  ; CHECK-LIBCALL: bl __aeabi_h2f  ; CHECK-LIBCALL: bl __aeabi_h2f  ; CHECK-VFP-LIBCALL: vbsl -; CHECK-NOVFP: bic  ; CHECK-NOVFP: and +; CHECK-NOVFP: bic  ; CHECK-NOVFP: orr  ; CHECK-LIBCALL: bl __aeabi_f2h  define void @test_copysign(half* %p, half* %q) #0 { @@ -818,25 +818,24 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {  ; CHECK-ALL-LABEL: test_insertelement:  ; CHECK-ALL: sub sp, sp, #8  ; CHECK-ALL: ldrh -; CHECK-ALL: strh  ; CHECK-ALL: ldrh -; CHECK-ALL: strh  ; CHECK-ALL: ldrh -; CHECK-ALL: strh  ; CHECK-ALL: ldrh -; CHECK-ALL: strh -; CHECK-ALL: mov +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: mov  ; CHECK-ALL-DAG: ldrh  ; CHECK-ALL-DAG: orr -; CHECK-ALL: strh -; CHECK-ALL: ldrh -; CHECK-ALL: strh -; CHECK-ALL: ldrh -; CHECK-ALL: strh -; CHECK-ALL: ldrh -; CHECK-ALL: strh -; CHECK-ALL: ldrh -; CHECK-ALL: strh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: strh  ; CHECK-ALL: add sp, sp, #8  define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {    %a = load half, half* %p, align 2 diff --git a/test/CodeGen/ARM/fp16-v3.ll b/test/CodeGen/ARM/fp16-v3.ll index a37f71d9ba88..e84fee2c2e1b 100644 --- a/test/CodeGen/ARM/fp16-v3.ll +++ b/test/CodeGen/ARM/fp16-v3.ll @@ -11,8 +11,8 @@ target triple = "armv7a--none-eabi"  ; CHECK: vadd.f32 [[SREG5:s[0-9]+]], [[SREG4]], [[SREG1]]  ; CHECK-NEXT: vcvtb.f16.f32 [[SREG6:s[0-9]+]], [[SREG5]]  ; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG6]] -; CHECK-NEXT: uxth [[RREG2:r[0-9]+]], [[RREG1]] -; CHECK-NEXT: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16 +; CHECK-DAG: uxth [[RREG2:r[0-9]+]], [[RREG1]] +; CHECK-DAG: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16  ; CHECK-DAG: strh [[RREG1]], [r0, #4]  ; CHECK-DAG: vmov [[DREG:d[0-9]+]], [[RREG3]], [[RREG2]]  ; CHECK-DAG: vst1.32 {[[DREG]][0]}, [r0:32] diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll index e0d2b7cffb44..ed443a1814e6 100644 --- a/test/CodeGen/ARM/ifcvt7.ll +++ b/test/CodeGen/ARM/ifcvt7.ll @@ -5,8 +5,6 @@  define fastcc i32 @CountTree(%struct.quad_struct* %tree) {  ; CHECK: cmpeq -; CHECK: moveq -; CHECK: popeq  entry:  	br label %tailrecurse diff --git a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll index 74117d3896bd..a633c0291c60 100644 --- a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -55,8 +55,8 @@ define void @i24_and_or(i24* %a) {  define void @i24_insert_bit(i24* %a, i1 zeroext %bit) {  ; LE-LABEL: i24_insert_bit:  ; LE:       @ BB#0: -; LE-NEXT:    ldrh r2, [r0]  ; LE-NEXT:    mov r3, #255 +; LE-NEXT:    ldrh r2, [r0]  ; LE-NEXT:    orr r3, r3, #57088  ; LE-NEXT:    and r2, r2, r3  ; LE-NEXT:    orr r1, r2, r1, lsl #13 @@ -99,8 +99,8 @@ define void @i56_or(i56* %a) {  ; BE-NEXT:    orr r2, r3, r2, lsl #8  ; BE-NEXT:    orr r2, r2, r12, lsl #24  ; BE-NEXT:    orr r2, r2, #384 -; BE-NEXT:    lsr r3, r2, #8  ; BE-NEXT:    strb r2, [r1, #2] +; BE-NEXT:    lsr r3, r2, #8  ; BE-NEXT:    strh r3, [r1]  ; BE-NEXT:    bic r1, r12, #255  ; BE-NEXT:    orr r1, r1, r2, lsr #24 @@ -127,8 +127,8 @@ define void @i56_and_or(i56* %a) {  ; BE-NEXT:    mov r3, #128  ; BE-NEXT:    ldrh r2, [r1, #4]!  ; BE-NEXT:    strb r3, [r1, #2] -; BE-NEXT:    lsl r2, r2, #8  ; BE-NEXT:    ldr r12, [r0] +; BE-NEXT:    lsl r2, r2, #8  ; BE-NEXT:    orr r2, r2, r12, lsl #24  ; BE-NEXT:    orr r2, r2, #384  ; BE-NEXT:    lsr r3, r2, #8 diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll index 90defad43a7d..a3ec2a7f3e77 100644 --- a/test/CodeGen/ARM/indirectbr.ll +++ b/test/CodeGen/ARM/indirectbr.ll @@ -56,9 +56,11 @@ L2:                                               ; preds = %L3, %bb2  L1:                                               ; preds = %L2, %bb2    %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]  ; ARM-LABEL: %L1 +; ARM: ldr [[R_NEXTADDR:r[0-9]+]], LCPI  ; ARM: ldr [[R1:r[0-9]+]], LCPI +; ARM: add [[R_NEXTADDR_b:r[0-9]+]], pc, [[R_NEXTADDR]]  ; ARM: add [[R1b:r[0-9]+]], pc, [[R1]] -; ARM: str [[R1b]] +; ARM: str [[R1b]], {{\[}}[[R_NEXTADDR_b]]]  ; THUMB-LABEL: %L1  ; THUMB: ldr [[R2:r[0-9]+]], LCPI diff --git a/test/CodeGen/ARM/jump-table-islands.ll b/test/CodeGen/ARM/jump-table-islands.ll index 6b4f174c0928..755ca30199ad 100644 --- a/test/CodeGen/ARM/jump-table-islands.ll +++ b/test/CodeGen/ARM/jump-table-islands.ll @@ -13,7 +13,7 @@ define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {  ; CHECK:   .long LBB{{[0-9]+_[0-9]+}}-[[JUMP_TABLE]]  ; CHECK: [[SKIP_TABLE]]: -; CHECK:   add pc, {{r[0-9]+}}, {{r[0-9]+}} +; CHECK:   add pc, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}    br i1 %tst, label %simple, label %complex  simple: diff --git a/test/CodeGen/ARM/jump-table-tbh.ll b/test/CodeGen/ARM/jump-table-tbh.ll index 2da8a5fafc40..b3ee68ea0758 100644 --- a/test/CodeGen/ARM/jump-table-tbh.ll +++ b/test/CodeGen/ARM/jump-table-tbh.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=thumbv7m-linux-gnu -o - %s | FileCheck %s --check-prefix=T2 -; RUN: llc -mtriple=thumbv6m-linux-gnu -o - %s | FileCheck %s --check-prefix=T1 +; RUN: llc -mtriple=thumbv7m-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=T2 +; RUN: llc -mtriple=thumbv6m-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=T1  declare void @foo(double)  declare i32 @llvm.arm.space(i32, i32) @@ -10,7 +10,7 @@ define i32 @test_tbh(i1 %tst, i32 %sw, i32 %l) {  ; T2-LABEL: test_tbh:  ; T2: [[ANCHOR:.LCPI[0-9_]+]]:  ; T2: tbh [pc, r{{[0-9]+}}, lsl #1] -; T2-NEXT: @ BB#1 +; T2-NEXT: @ BB#{{[0-9]+}}  ; T2-NEXT: LJTI  ; T2-NEXT: .short	(.LBB0_[[x:[0-9]+]]-([[ANCHOR]]+4))/2  ; T2-NEXT: .short	(.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2 diff --git a/test/CodeGen/ARM/ldm-stm-i256.ll b/test/CodeGen/ARM/ldm-stm-i256.ll index 7b4151dabf6d..151c42e0e158 100644 --- a/test/CodeGen/ARM/ldm-stm-i256.ll +++ b/test/CodeGen/ARM/ldm-stm-i256.ll @@ -17,22 +17,24 @@ entry:    %add6 = add nsw i256 %or, %d    store i256 %add6, i256* %b, align 8    ret void -  ; CHECK-DAG: ldm r3    ; CHECK-DAG: ldm r2 -  ; CHECK-DAG: ldr {{.*}}, [r3, #20] +  ; CHECK-DAG: ldr {{.*}}, [r3] +  ; CHECK-DAG: ldr {{.*}}, [r3, #4] +  ; CHECK-DAG: ldr {{.*}}, [r3, #8] +  ; CHECK-DAG: ldr {{.*}}, [r3, #12]    ; CHECK-DAG: ldr {{.*}}, [r3, #16] -  ; CHECK-DAG: ldr {{.*}}, [r3, #28] +  ; CHECK-DAG: ldr {{.*}}, [r3, #20]    ; CHECK-DAG: ldr {{.*}}, [r3, #24] +  ; CHECK-DAG: ldr {{.*}}, [r3, #28]    ; CHECK-DAG: ldr {{.*}}, [r2, #20] -  ; CHECK-DAG: ldr {{.*}}, [r2, #16] -  ; CHECK-DAG: ldr {{.*}}, [r2, #28]    ; CHECK-DAG: ldr {{.*}}, [r2, #24] -  ; CHECK-DAG: stmib r0 -  ; CHECK-DAG: str {{.*}}, [r0] +  ; CHECK-DAG: ldr {{.*}}, [r2, #28] +  ; CHECK-DAG: stm r0 +  ; CHECK-DAG: str {{.*}}, [r0, #20]    ; CHECK-DAG: str {{.*}}, [r0, #24]    ; CHECK-DAG: str {{.*}}, [r0, #28] -  ; CHECK-DAG: str {{.*}}, [r1] -  ; CHECK-DAG: stmib r1 +  ; CHECK-DAG: stm r1 +  ; CHECK-DAG: str {{.*}}, [r1, #20]    ; CHECK-DAG: str {{.*}}, [r1, #24]    ; CHECK-DAG: str {{.*}}, [r1, #28]  } diff --git a/test/CodeGen/ARM/legalize-unaligned-load.ll b/test/CodeGen/ARM/legalize-unaligned-load.ll index eb4e942f0742..ccf93c3ef55e 100644 --- a/test/CodeGen/ARM/legalize-unaligned-load.ll +++ b/test/CodeGen/ARM/legalize-unaligned-load.ll @@ -10,7 +10,7 @@  ; CHECK-NOT: str  ; CHECK: ldr  ; CHECK: str -; CHECK: bx +; CHECK: {{bx|pop.*pc}}  define i32 @get_set_complex({ float, float }* noalias nocapture %retptr,                              { i8*, i32 }** noalias nocapture readnone %excinfo,                              i8* noalias nocapture readnone %env, diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll index f09167ed9e78..1fbc3f2c0838 100644 --- a/test/CodeGen/ARM/long-setcc.ll +++ b/test/CodeGen/ARM/long-setcc.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi < %s | FileCheck %s  define i1 @t1(i64 %x) {  	%B = icmp slt i64 %x, 0 diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index 3ec5fa41aa6f..cf8396db9db5 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -28,15 +28,15 @@ define i32 @f1(i64 %x, i64 %y) {  define i32 @f2(i64 %x, i64 %y) {  ; CHECK-LABEL: f2: -; CHECK-LE:      lsr{{.*}}r2 -; CHECK-LE-NEXT: rsb     r3, r2, #32 +; CHECK-LE:      rsb     r3, r2, #32 +; CHECK-LE-NEXT: lsr{{.*}}r2  ; CHECK-LE-NEXT: sub     r2, r2, #32  ; CHECK-LE-NEXT: orr     r0, r0, r1, lsl r3  ; CHECK-LE-NEXT: cmp     r2, #0  ; CHECK-LE-NEXT: asrge   r0, r1, r2 -; CHECK-BE:      lsr{{.*}}r3 -; CHECK-BE-NEXT: rsb     r2, r3, #32 +; CHECK-BE:      rsb     r2, r3, #32 +; CHECK-BE-NEXT: lsr{{.*}}r3  ; CHECK-BE-NEXT: orr     r1, r1, r0, lsl r2  ; CHECK-BE-NEXT: sub     r2, r3, #32  ; CHECK-BE-NEXT: cmp     r2, #0 @@ -49,15 +49,15 @@ define i32 @f2(i64 %x, i64 %y) {  define i32 @f3(i64 %x, i64 %y) {  ; CHECK-LABEL: f3: -; CHECK-LE:      lsr{{.*}}r2 -; CHECK-LE-NEXT: rsb     r3, r2, #32 +; CHECK-LE:      rsb     r3, r2, #32 +; CHECK-LE-NEXT: lsr{{.*}}r2  ; CHECK-LE-NEXT: sub     r2, r2, #32  ; CHECK-LE-NEXT: orr     r0, r0, r1, lsl r3  ; CHECK-LE-NEXT: cmp     r2, #0  ; CHECK-LE-NEXT: lsrge   r0, r1, r2 -; CHECK-BE:      lsr{{.*}}r3 -; CHECK-BE-NEXT: rsb     r2, r3, #32 +; CHECK-BE:      rsb     r2, r3, #32 +; CHECK-BE-NEXT: lsr{{.*}}r3  ; CHECK-BE-NEXT: orr     r1, r1, r0, lsl r2  ; CHECK-BE-NEXT: sub     r2, r3, #32  ; CHECK-BE-NEXT: cmp     r2, #0 diff --git a/test/CodeGen/ARM/misched-fusion-aes.ll b/test/CodeGen/ARM/misched-fusion-aes.ll index d3558ab4abb0..483f26cc8e00 100644 --- a/test/CodeGen/ARM/misched-fusion-aes.ll +++ b/test/CodeGen/ARM/misched-fusion-aes.ll @@ -74,15 +74,16 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,  ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]]  ; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]] +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}  ; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]]  ; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]]  ; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]] +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}  ; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]] -; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}  ; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]]  ; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} @@ -159,15 +160,16 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,  ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]]  ; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]] +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}  ; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]]  ; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]]  ; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]] +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}  ; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]] -; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}  ; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}  ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QG]]  ; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} diff --git a/test/CodeGen/ARM/select_const.ll b/test/CodeGen/ARM/select_const.ll index 48fe572bf8a7..23de9c35a5b8 100644 --- a/test/CodeGen/ARM/select_const.ll +++ b/test/CodeGen/ARM/select_const.ll @@ -281,16 +281,16 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {  ; CHECK:       @ BB#0:  ; CHECK-NEXT:    .save {r4, lr}  ; CHECK-NEXT:    push {r4, lr} -; CHECK-NEXT:    ands r12, r0, #1  ; CHECK-NEXT:    mov lr, #1 +; CHECK-NEXT:    ands r12, r0, #1  ; CHECK-NEXT:    mov r0, #23 -; CHECK-NEXT:    eor r3, r3, #1  ; CHECK-NEXT:    orr lr, lr, #65536  ; CHECK-NEXT:    mvnne r0, #3 -; CHECK-NEXT:    movne r12, #1  ; CHECK-NEXT:    and r4, r0, lr -; CHECK-NEXT:    eor r2, r2, lr +; CHECK-NEXT:    movne r12, #1  ; CHECK-NEXT:    subs r0, r4, #1 +; CHECK-NEXT:    eor r2, r2, lr +; CHECK-NEXT:    eor r3, r3, #1  ; CHECK-NEXT:    sbc r1, r12, #0  ; CHECK-NEXT:    orrs r2, r2, r3  ; CHECK-NEXT:    movne r0, r4 diff --git a/test/CodeGen/ARM/shift-i64.ll b/test/CodeGen/ARM/shift-i64.ll index 12cc5fbe03e4..3644afa17ca4 100644 --- a/test/CodeGen/ARM/shift-i64.ll +++ b/test/CodeGen/ARM/shift-i64.ll @@ -29,8 +29,8 @@ define i64 @test_shl(i64 %val, i64 %amt) {  ; Explanation for lshr is pretty much the reverse of shl.  define i64 @test_lshr(i64 %val, i64 %amt) {  ; CHECK-LABEL: test_lshr: -; CHECK: lsr r0, r0, r2  ; CHECK: rsb [[REVERSE_SHIFT:.*]], r2, #32 +; CHECK: lsr r0, r0, r2  ; CHECK: orr r0, r0, r1, lsl [[REVERSE_SHIFT]]  ; CHECK: sub [[EXTRA_SHIFT:.*]], r2, #32  ; CHECK: cmp [[EXTRA_SHIFT]], #0 diff --git a/test/CodeGen/ARM/ssp-data-layout.ll b/test/CodeGen/ARM/ssp-data-layout.ll index 92fa0809ed2d..39c279eb90d4 100644 --- a/test/CodeGen/ARM/ssp-data-layout.ll +++ b/test/CodeGen/ARM/ssp-data-layout.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -disable-fp-elim -march=arm -mcpu=cortex-a8 -mtriple arm-linux-gnu -o - | FileCheck %s +; RUN: llc < %s -disable-fp-elim -march=arm -mcpu=cortex-a8 -mtriple arm-linux-gnu -target-abi=apcs -o - | FileCheck %s  ;  This test is fairly fragile.  The goal is to ensure that "large" stack  ;  objects are allocated closest to the stack protector (i.e., farthest away   ;  from the Stack Pointer.)  In standard SSP mode this means that large (>= diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll index 4b8b4c6bca72..1c6c05de2579 100644 --- a/test/CodeGen/ARM/str_pre-2.ll +++ b/test/CodeGen/ARM/str_pre-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=armv6-linux-gnu -target-abi=apcs | FileCheck %s  @b = external global i64* diff --git a/test/CodeGen/ARM/swifterror.ll b/test/CodeGen/ARM/swifterror.ll index 3fd57c592bfb..b02adf7912b5 100644 --- a/test/CodeGen/ARM/swifterror.ll +++ b/test/CodeGen/ARM/swifterror.ll @@ -420,10 +420,10 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {  ; CHECK-ARMV7-DAG:  str     r8, [s[[STK1:.*]]]  ; CHECK-ARMV7-DAG:  str     r10, [s[[STK2:.*]]]  ; Store arguments. -; CHECK-ARMV7:  mov     r6, r3 -; CHECK-ARMV7:  mov     r4, r2 -; CHECK-ARMV7:  mov     r11, r1 -; CHECK-ARMV7:  mov     r5, r0 +; CHECK-ARMV7-DAG:  mov     r6, r3 +; CHECK-ARMV7-DAG:  mov     r4, r2 +; CHECK-ARMV7-DAG:  mov     r11, r1 +; CHECK-ARMV7-DAG:  mov     r5, r0  ; Setup call.  ; CHECK-ARMV7:  mov     r0, #1  ; CHECK-ARMV7:  mov     r1, #2 @@ -435,10 +435,10 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {  ; Restore original arguments.  ; CHECK-ARMV7-DAG:  ldr     r10, [s[[STK2]]]  ; CHECK-ARMV7-DAG:  ldr     r8, [s[[STK1]]] -; CHECK-ARMV7:  mov     r0, r5 -; CHECK-ARMV7:  mov     r1, r11 -; CHECK-ARMV7:  mov     r2, r4 -; CHECK-ARMV7:  mov     r3, r6 +; CHECK-ARMV7-DAG:  mov     r0, r5 +; CHECK-ARMV7-DAG:  mov     r1, r11 +; CHECK-ARMV7-DAG:  mov     r2, r4 +; CHECK-ARMV7-DAG:  mov     r3, r6  ; CHECK-ARMV7:  bl      _params_in_reg2  ; CHECK-ARMV7:  pop     {r4, r5, r6, r7, r10, r11, pc}  define swiftcc void @params_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) { @@ -469,25 +469,25 @@ declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_e  ; CHECK-ARMV7:  mov     r8, #0  ; CHECK-ARMV7:  bl      _params_in_reg2  ; Restore original arguments. -; CHECK-ARMV7:  ldr     r3, [s[[STK2]]] -; CHECK-ARMV7:  ldr     r10, [s[[STK1]]] +; CHECK-ARMV7-DAG:  ldr     r3, [s[[STK2]]] +; CHECK-ARMV7-DAG:  ldr     r10, [s[[STK1]]]  ; Store %error_ptr_ref; -; CHECK-ARMV7:  str     r8, [s[[STK3:.*]]] +; CHECK-ARMV7-DAG:  str     r8, [s[[STK3:.*]]]  ; Restore original arguments. -; CHECK-ARMV7:  mov     r0, r5 -; CHECK-ARMV7:  mov     r1, r11 -; CHECK-ARMV7:  mov     r2, r4 -; CHECK-ARMV7:  mov     r8, r6 +; CHECK-ARMV7-DAG:  mov     r0, r5 +; CHECK-ARMV7-DAG:  mov     r1, r11 +; CHECK-ARMV7-DAG:  mov     r2, r4 +; CHECK-ARMV7-DAG:  mov     r8, r6  ; CHECK-ARMV7:  bl      _params_and_return_in_reg2  ; Store swifterror return %err; -; CHECK-ARMV7:  str     r8, [s[[STK1]]] +; CHECK-ARMV7-DAG:  str     r8, [s[[STK1]]]  ; Load swifterror value %error_ptr_ref. -; CHECK-ARMV7:  ldr     r8, [s[[STK3]]] +; CHECK-ARMV7-DAG:  ldr     r8, [s[[STK3]]]  ; Save return values. -; CHECK-ARMV7:  mov     r4, r0 -; CHECK-ARMV7:  mov     r5, r1 -; CHECK-ARMV7:  mov     r6, r2 -; CHECK-ARMV7:  mov     r11, r3 +; CHECK-ARMV7-DAG:  mov     r4, r0 +; CHECK-ARMV7-DAG:  mov     r5, r1 +; CHECK-ARMV7-DAG:  mov     r6, r2 +; CHECK-ARMV7-DAG:  mov     r11, r3  ; Setup call.  ; CHECK-ARMV7:  mov     r0, #1  ; CHECK-ARMV7:  mov     r1, #2 @@ -496,12 +496,12 @@ declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_e  ; CHECK-ARMV7:  mov     r10, #0  ; CHECK-ARMV7:  bl      _params_in_reg2  ; Load swifterror %err; -; CHECK-ARMV7:  ldr     r8, [s[[STK1]]] +; CHECK-ARMV7-DAG:  ldr     r8, [s[[STK1]]]  ; Restore return values for returning. -; CHECK-ARMV7:  mov     r0, r4 -; CHECK-ARMV7:  mov     r1, r5 -; CHECK-ARMV7:  mov     r2, r6 -; CHECK-ARMV7:  mov     r3, r11 +; CHECK-ARMV7-DAG:  mov     r0, r4 +; CHECK-ARMV7-DAG:  mov     r1, r5 +; CHECK-ARMV7-DAG:  mov     r2, r6 +; CHECK-ARMV7-DAG:  mov     r3, r11  ; CHECK-ARMV7:  pop     {r4, r5, r6, r7, r10, r11, pc}  define swiftcc { i32, i32, i32, i32} @params_and_return_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) {    %error_ptr_ref = alloca swifterror %swift_error*, align 8 diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll index aaefc0a14863..6d93869ec10f 100644 --- a/test/CodeGen/ARM/thumb2-it-block.ll +++ b/test/CodeGen/ARM/thumb2-it-block.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s -; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 < %s | FileCheck %s +; RUN: llc -mtriple=thumbv8 < %s | FileCheck %s  ; PR11107  define i32 @test(i32 %a, i32 %b) { diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index c39c939d6c95..1e68ff13699a 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -162,8 +162,8 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {  ; rdar://7923010  define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {  ;CHECK-LABEL: vcgt_zext: -;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1 -;CHECK: vcgt.f32 [[Q1:q[0-9]+]] +;CHECK-DAG: vmov.i32 [[Q0:q[0-9]+]], #0x1 +;CHECK-DAG: vcgt.f32 [[Q1:q[0-9]+]]  ;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]  	%tmp1 = load <4 x float>, <4 x float>* %A  	%tmp2 = load <4 x float>, <4 x float>* %B diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll index 2ef2a0697ec9..8623d2c164ba 100644 --- a/test/CodeGen/ARM/vector-DAGCombine.ll +++ b/test/CodeGen/ARM/vector-DAGCombine.ll @@ -237,14 +237,14 @@ entry:  ; illegal type to a legal type.  define <2 x i8> @test_truncate(<2 x i128> %in) {  ; CHECK-LABEL: test_truncate: -; CHECK: mov [[BASE:r[0-9]+]], sp -; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32] -; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4 -; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]  ; REG2 Should map on the same Q register as REG1, i.e., REG2 = REG1 - 1, but we  ; cannot express that. -; CHECK-NEXT: vmov.32 [[REG2:d[0-9]+]][0], r0 +; CHECK: vmov.32 [[REG2:d[0-9]+]][0], r0 +; CHECK-NEXT: mov [[BASE:r[0-9]+]], sp +; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32] +; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4  ; CHECK-NEXT: vmov.32 [[REG2]][1], r1 +; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]  ; The Q register used here should match floor(REG1/2), but we cannot express that.  ; CHECK-NEXT: vmovn.i64 [[RES:d[0-9]+]], q{{[0-9]+}}  ; CHECK-NEXT: vmov r0, r1, [[RES]] diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index 5742dc314978..5b524145be76 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -182,9 +182,9 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {  ; CHECK-LABEL: test_interleaved:  ; CHECK:       @ BB#0:  ; CHECK-NEXT:    vld1.64 {d16, d17}, [r0] -; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]  ; CHECK-NEXT:    vext.16 d16, d16, d17, #3  ; CHECK-NEXT:    vorr d17, d16, d16 +; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]  ; CHECK-NEXT:    vuzp.16 d16, d17  ; CHECK-NEXT:    vzip.16 d16, d18  ; CHECK-NEXT:    vmov r0, r1, d16 @@ -217,16 +217,16 @@ define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {  ; CHECK-LABEL: test_multisource:  ; CHECK:       @ BB#0:  ; CHECK-NEXT:    mov r1, r0 -; CHECK-NEXT:    add r2, r0, #32 -; CHECK-NEXT:    add r0, r0, #48 +; CHECK-NEXT:    add r2, r0, #48 +; CHECK-NEXT:    add r0, r0, #32  ; CHECK-NEXT:    vld1.16 {d16, d17}, [r1:128]! -; CHECK-NEXT:    vld1.64 {d20, d21}, [r2:128] -; CHECK-NEXT:    vld1.64 {d18, d19}, [r0:128] -; CHECK-NEXT:    vld1.64 {d22, d23}, [r1:128] +; CHECK-NEXT:    vld1.64 {d20, d21}, [r0:128]  ; CHECK-NEXT:    vorr d24, d20, d20 +; CHECK-NEXT:    vld1.64 {d18, d19}, [r2:128] +; CHECK-NEXT:    vld1.64 {d22, d23}, [r1:128]  ; CHECK-NEXT:    vzip.16 d24, d18 -; CHECK-NEXT:    vext.16 d18, d20, d24, #2  ; CHECK-NEXT:    vtrn.16 q8, q11 +; CHECK-NEXT:    vext.16 d18, d20, d24, #2  ; CHECK-NEXT:    vext.16 d16, d18, d16, #2  ; CHECK-NEXT:    vext.16 d16, d16, d16, #2  ; CHECK-NEXT:    vmov r0, r1, d16 @@ -259,24 +259,24 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {  define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {  ; CHECK-LABEL: test_illegal:  ; CHECK:       @ BB#0: -; CHECK-NEXT:    vld1.64 {d16, d17}, [r0] -; CHECK-NEXT:    vld1.64 {d18, d19}, [r1] -; CHECK-NEXT:    vmov.u16 r1, d16[0] -; CHECK-NEXT:    vmov.u16 r0, d17[3] -; CHECK-NEXT:    vorr d22, d16, d16 -; CHECK-NEXT:    vorr d23, d16, d16 -; CHECK-NEXT:    vmov.16 d20[0], r1 -; CHECK-NEXT:    vuzp.16 d22, d23 -; CHECK-NEXT:    vmov.u16 r1, d17[1] -; CHECK-NEXT:    vmov.16 d20[1], r0 -; CHECK-NEXT:    vuzp.16 d22, d18 -; CHECK-NEXT:    vmov.16 d20[2], r1 -; CHECK-NEXT:    vmov.u16 r0, d19[1] -; CHECK-NEXT:    vext.16 d21, d16, d18, #3 -; CHECK-NEXT:    vmov.16 d20[3], r0 -; CHECK-NEXT:    vmov r0, r1, d20 -; CHECK-NEXT:    vmov r2, r3, d21 -; CHECK-NEXT:    mov pc, lr +; CHECK-NEXT:	vld1.64	{d16, d17}, [r0] +; CHECK-NEXT:	vorr	d22, d16, d16 +; CHECK-NEXT:	vmov.u16	r0, d16[0] +; CHECK-NEXT:	vorr	d23, d16, d16 +; CHECK-NEXT:	vmov.u16	r2, d17[3] +; CHECK-NEXT:	vmov.u16	r3, d17[1] +; CHECK-NEXT:	vld1.64	{d18, d19}, [r1] +; CHECK-NEXT:	vmov.u16	r1, d19[1] +; CHECK-NEXT:	vuzp.16	d22, d23 +; CHECK-NEXT:	vuzp.16	d22, d18 +; CHECK-NEXT:	vmov.16	d20[0], r0 +; CHECK-NEXT:	vmov.16	d20[1], r2 +; CHECK-NEXT:	vmov.16	d20[2], r3 +; CHECK-NEXT:	vmov.16	d20[3], r1 +; CHECK-NEXT:	vext.16	d21, d16, d18, #3 +; CHECK-NEXT:	vmov	r0, r1, d20 +; CHECK-NEXT:	vmov	r2, r3, d21 +; CHECK-NEXT:	mov	pc, lr         %tmp1 = load <8 x i16>, <8 x i16>* %A         %tmp2 = load <8 x i16>, <8 x i16>* %B         %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9> @@ -289,10 +289,10 @@ define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>  ; CHECK-LABEL: test_elem_mismatch:  ; CHECK:       @ BB#0:  ; CHECK-NEXT:    vld1.64 {d16, d17}, [r0:128] -; CHECK-NEXT:    vmov.32 r2, d16[0] -; CHECK-NEXT:    vmov.32 r0, d17[0] -; CHECK-NEXT:    vmov.16 d16[0], r2 -; CHECK-NEXT:    vmov.16 d16[1], r0 +; CHECK-NEXT:    vmov.32 r0, d16[0] +; CHECK-NEXT:    vmov.32 r2, d17[0] +; CHECK-NEXT:    vmov.16 d16[0], r0 +; CHECK-NEXT:    vmov.16 d16[1], r2  ; CHECK-NEXT:    vstr d16, [r1]  ; CHECK-NEXT:    mov pc, lr    %tmp0 = load <2 x i64>, <2 x i64>* %src, align 16 diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll index 03c0354aa1df..8fa5113d8a31 100644 --- a/test/CodeGen/ARM/vfp.ll +++ b/test/CodeGen/ARM/vfp.ll @@ -40,8 +40,8 @@ define void @test_add(float* %P, double* %D) {  define void @test_ext_round(float* %P, double* %D) {  ;CHECK-LABEL: test_ext_round:  	%a = load float, float* %P		; <float> [#uses=1] -;CHECK: vcvt.f64.f32 -;CHECK: vcvt.f32.f64 +;CHECK-DAG: vcvt.f64.f32 +;CHECK-DAG: vcvt.f32.f64  	%b = fpext float %a to double		; <double> [#uses=1]  	%A = load double, double* %D		; <double> [#uses=1]  	%B = fptrunc double %A to float		; <float> [#uses=1] diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll index bdb384769741..c50e0beea4d1 100644 --- a/test/CodeGen/ARM/vld1.ll +++ b/test/CodeGen/ARM/vld1.ll @@ -78,7 +78,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {  ;Check for a post-increment updating load.  define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {  ;CHECK-LABEL: vld1Qi8_update: -;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]! +;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+|lr}}:64]!  	%A = load i8*, i8** %ptr  	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %A, i32 8)  	%tmp2 = getelementptr i8, i8* %A, i32 16 diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll index 1ca16587bd91..6ef37c1b6678 100644 --- a/test/CodeGen/ARM/vld2.ll +++ b/test/CodeGen/ARM/vld2.ll @@ -14,7 +14,7 @@  define <8 x i8> @vld2i8(i8* %A) nounwind {  ;CHECK-LABEL: vld2i8:  ;Check the alignment value.  Max for this instruction is 128 bits: -;CHECK: vld2.8 {d16, d17}, [r0:64] +;CHECK: vld2.8 {d16, d17}, [{{r[0-9]+|lr}}:64]  	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8.p0i8(i8* %A, i32 8)          %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0          %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1 @@ -25,7 +25,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {  define <4 x i16> @vld2i16(i16* %A) nounwind {  ;CHECK-LABEL: vld2i16:  ;Check the alignment value.  Max for this instruction is 128 bits: -;CHECK: vld2.16 {d16, d17}, [r0:128] +;CHECK: vld2.16 {d16, d17}, [{{r[0-9]+|lr}}:128]  	%tmp0 = bitcast i16* %A to i8*  	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16.p0i8(i8* %tmp0, i32 32)          %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0 @@ -59,7 +59,7 @@ define <2 x float> @vld2f(float* %A) nounwind {  ;Check for a post-increment updating load.   define <2 x float> @vld2f_update(float** %ptr) nounwind {  ;CHECK-LABEL: vld2f_update: -;CHECK: vld2.32 {d16, d17}, [r1]! +;CHECK: vld2.32 {d16, d17}, [{{r[0-9]+|lr}}]!  	%A = load float*, float** %ptr  	%tmp0 = bitcast float* %A to i8*  	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32.p0i8(i8* %tmp0, i32 1) @@ -74,7 +74,7 @@ define <2 x float> @vld2f_update(float** %ptr) nounwind {  define <1 x i64> @vld2i64(i64* %A) nounwind {  ;CHECK-LABEL: vld2i64:  ;Check the alignment value.  Max for this instruction is 128 bits: -;CHECK: vld1.64 {d16, d17}, [r0:128] +;CHECK: vld1.64 {d16, d17}, [{{r[0-9]+|lr}}:128]  	%tmp0 = bitcast i64* %A to i8*  	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64.p0i8(i8* %tmp0, i32 32)          %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0 @@ -86,7 +86,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {  define <16 x i8> @vld2Qi8(i8* %A) nounwind {  ;CHECK-LABEL: vld2Qi8:  ;Check the alignment value.  Max for this instruction is 256 bits: -;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64] +;CHECK: vld2.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:64]  	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 8)          %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0          %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 @@ -97,7 +97,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {  ;Check for a post-increment updating load with register increment.  define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {  ;CHECK-LABEL: vld2Qi8_update: -;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1 +;CHECK: vld2.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128], r1  	%A = load i8*, i8** %ptr  	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 16)          %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 @@ -111,7 +111,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {  define <8 x i16> @vld2Qi16(i16* %A) nounwind {  ;CHECK-LABEL: vld2Qi16:  ;Check the alignment value.  Max for this instruction is 256 bits: -;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128] +;CHECK: vld2.16 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128]  	%tmp0 = bitcast i16* %A to i8*  	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16.p0i8(i8* %tmp0, i32 16)          %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0 @@ -123,7 +123,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {  define <4 x i32> @vld2Qi32(i32* %A) nounwind {  ;CHECK-LABEL: vld2Qi32:  ;Check the alignment value.  Max for this instruction is 256 bits: -;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256] +;CHECK: vld2.32 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]  	%tmp0 = bitcast i32* %A to i8*  	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %tmp0, i32 64)          %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0 diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index c3e8ee8691fd..0eaad0f90035 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -15,7 +15,7 @@  define <8 x i8> @vld3i8(i8* %A) nounwind {  ;CHECK-LABEL: vld3i8:  ;Check the alignment value.  Max for this instruction is 64 bits: -;CHECK: vld3.8 {d16, d17, d18}, [r0:64] +;CHECK: vld3.8 {d16, d17, d18}, [{{r[0-9]+|lr}}:64]  	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A, i32 32)          %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0          %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 @@ -37,7 +37,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {  ;Check for a post-increment updating load with register increment.  define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {  ;CHECK-LABEL: vld3i16_update: -;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}} +;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+|lr}}], {{r[0-9]+|lr}}  	%A = load i16*, i16** %ptr  	%tmp0 = bitcast i16* %A to i8*  	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8* %tmp0, i32 1) @@ -74,7 +74,7 @@ define <2 x float> @vld3f(float* %A) nounwind {  define <1 x i64> @vld3i64(i64* %A) nounwind {  ;CHECK-LABEL: vld3i64:  ;Check the alignment value.  Max for this instruction is 64 bits: -;CHECK: vld1.64 {d16, d17, d18}, [r0:64] +;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64]  	%tmp0 = bitcast i64* %A to i8*  	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)          %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 @@ -85,7 +85,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {  define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {  ;CHECK-LABEL: vld3i64_update: -;CHECK: vld1.64	{d16, d17, d18}, [r1:64]! +;CHECK: vld1.64	{d16, d17, d18}, [{{r[0-9]+|lr}}:64]!          %tmp0 = bitcast i64* %A to i8*          %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)          %tmp5 = getelementptr i64, i64* %A, i32 3 @@ -99,8 +99,8 @@ define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {  define <16 x i8> @vld3Qi8(i8* %A) nounwind {  ;CHECK-LABEL: vld3Qi8:  ;Check the alignment value.  Max for this instruction is 64 bits: -;CHECK: vld3.8 {d16, d18, d20}, [r0:64]! -;CHECK: vld3.8 {d17, d19, d21}, [r0:64] +;CHECK: vld3.8 {d16, d18, d20}, [{{r[0-9]+|lr}}:64]! +;CHECK: vld3.8 {d17, d19, d21}, [{{r[0-9]+|lr}}:64]  	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0i8(i8* %A, i32 32)          %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0          %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 @@ -135,8 +135,8 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {  ;Check for a post-increment updating load.   define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {  ;CHECK-LABEL: vld3Qi32_update: -;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]! -;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]! +;CHECK: vld3.32 {d16, d18, d20}, {{\[}}[[R:r[0-9]+|lr]]]! +;CHECK: vld3.32 {d17, d19, d21}, {{\[}}[[R]]]!  	%A = load i32*, i32** %ptr  	%tmp0 = bitcast i32* %A to i8*  	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1) diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll index 10570039a9d2..5663e6d41f02 100644 --- a/test/CodeGen/ARM/vld4.ll +++ b/test/CodeGen/ARM/vld4.ll @@ -14,7 +14,7 @@  define <8 x i8> @vld4i8(i8* %A) nounwind {  ;CHECK-LABEL: vld4i8:  ;Check the alignment value.  Max for this instruction is 256 bits: -;CHECK: vld4.8 {d16, d17, d18, d19}, [r0:64] +;CHECK: vld4.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:64]  	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8)          %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0          %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2 @@ -25,7 +25,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {  ;Check for a post-increment updating load with register increment.  define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {  ;CHECK-LABEL: vld4i8_update: -;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1 +;CHECK: vld4.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128], r1  	%A = load i8*, i8** %ptr  	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 16)  	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 @@ -39,7 +39,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {  define <4 x i16> @vld4i16(i16* %A) nounwind {  ;CHECK-LABEL: vld4i16:  ;Check the alignment value.  Max for this instruction is 256 bits: -;CHECK: vld4.16 {d16, d17, d18, d19}, [r0:128] +;CHECK: vld4.16 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128]  	%tmp0 = bitcast i16* %A to i8*  	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16.p0i8(i8* %tmp0, i32 16)          %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0 @@ -51,7 +51,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {  define <2 x i32> @vld4i32(i32* %A) nounwind {  ;CHECK-LABEL: vld4i32:  ;Check the alignment value.  Max for this instruction is 256 bits: -;CHECK: vld4.32 {d16, d17, d18, d19}, [r0:256] +;CHECK: vld4.32 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]  	%tmp0 = bitcast i32* %A to i8*  	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* %tmp0, i32 32)          %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 @@ -74,7 +74,7 @@ define <2 x float> @vld4f(float* %A) nounwind {  define <1 x i64> @vld4i64(i64* %A) nounwind {  ;CHECK-LABEL: vld4i64:  ;Check the alignment value.  Max for this instruction is 256 bits: -;CHECK: vld1.64 {d16, d17, d18, d19}, [r0:256] +;CHECK: vld1.64 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]  	%tmp0 = bitcast i64* %A to i8*  	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)          %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 @@ -85,7 +85,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {  define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {  ;CHECK-LABEL: vld4i64_update: -;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]! +;CHECK: vld1.64 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]!          %tmp0 = bitcast i64* %A to i8*          %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)          %tmp5 = getelementptr i64, i64* %A, i32 4 @@ -99,8 +99,8 @@ define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {  define <16 x i8> @vld4Qi8(i8* %A) nounwind {  ;CHECK-LABEL: vld4Qi8:  ;Check the alignment value.  Max for this instruction is 256 bits: -;CHECK: vld4.8 {d16, d18, d20, d22}, [r0:256]! -;CHECK: vld4.8 {d17, d19, d21, d23}, [r0:256] +;CHECK: vld4.8 {d16, d18, d20, d22}, [{{r[0-9]+|lr}}:256]! +;CHECK: vld4.8 {d17, d19, d21, d23}, [{{r[0-9]+|lr}}:256]  	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8.p0i8(i8* %A, i32 64)          %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0          %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2 @@ -111,8 +111,8 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {  define <8 x i16> @vld4Qi16(i16* %A) nounwind {  ;CHECK-LABEL: vld4Qi16:  ;Check for no alignment specifier. -;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]! -;CHECK: vld4.16 {d17, d19, d21, d23}, [r0] +;CHECK: vld4.16 {d16, d18, d20, d22}, [{{r[0-9]+|lr}}]! +;CHECK: vld4.16 {d17, d19, d21, d23}, [{{r[0-9]+|lr}}]  	%tmp0 = bitcast i16* %A to i8*  	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 1)          %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0 @@ -124,8 +124,8 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {  ;Check for a post-increment updating load.   define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {  ;CHECK-LABEL: vld4Qi16_update: -;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]! -;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]! +;CHECK: vld4.16 {d16, d18, d20, d22}, [{{r[0-9]+|lr}}:64]! +;CHECK: vld4.16 {d17, d19, d21, d23}, [{{r[0-9]+|lr}}:64]!  	%A = load i16*, i16** %ptr  	%tmp0 = bitcast i16* %A to i8*  	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 8) diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll index 71ca0f791524..72f9434fd10a 100644 --- a/test/CodeGen/ARM/vlddup.ll +++ b/test/CodeGen/ARM/vlddup.ll @@ -3,7 +3,7 @@  define <8 x i8> @vld1dupi8(i8* %A) nounwind {  ;CHECK-LABEL: vld1dupi8:  ;Check the (default) alignment value. -;CHECK: vld1.8 {d16[]}, [r0] +;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}]  	%tmp1 = load i8, i8* %A, align 8  	%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0  	%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer @@ -13,7 +13,7 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind {  define <8 x i8> @vld1dupi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind {  entry:  ;CHECK-LABEL: vld1dupi8_preinc: -;CHECK: vld1.8 {d16[]}, [r1] +;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}]    %0 = load i8*, i8** %a, align 4    %add.ptr = getelementptr inbounds i8, i8* %0, i32 %b    %1 = load i8, i8* %add.ptr, align 1 @@ -26,7 +26,7 @@ entry:  define <8 x i8> @vld1dupi8_postinc_fixed(i8** noalias nocapture %a) nounwind {  entry:  ;CHECK-LABEL: vld1dupi8_postinc_fixed: -;CHECK: vld1.8 {d16[]}, [r1]! +;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}]!    %0 = load i8*, i8** %a, align 4    %1 = load i8, i8* %0, align 1    %2 = insertelement <8 x i8> undef, i8 %1, i32 0 @@ -39,7 +39,7 @@ entry:  define <8 x i8> @vld1dupi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind {  entry:  ;CHECK-LABEL: vld1dupi8_postinc_register: -;CHECK: vld1.8 {d16[]}, [r2], r1 +;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}], r1    %0 = load i8*, i8** %a, align 4    %1 = load i8, i8* %0, align 1    %2 = insertelement <8 x i8> undef, i8 %1, i32 0 @@ -52,7 +52,7 @@ entry:  define <16 x i8> @vld1dupqi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind {  entry:  ;CHECK-LABEL: vld1dupqi8_preinc: -;CHECK: vld1.8 {d16[], d17[]}, [r1] +;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]    %0 = load i8*, i8** %a, align 4    %add.ptr = getelementptr inbounds i8, i8* %0, i32 %b    %1 = load i8, i8* %add.ptr, align 1 @@ -65,7 +65,7 @@ entry:  define <16 x i8> @vld1dupqi8_postinc_fixed(i8** noalias nocapture %a) nounwind {  entry:  ;CHECK-LABEL: vld1dupqi8_postinc_fixed: -;CHECK: vld1.8 {d16[], d17[]}, [r1]! +;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]!    %0 = load i8*, i8** %a, align 4    %1 = load i8, i8* %0, align 1    %2 = insertelement <16 x i8> undef, i8 %1, i32 0 @@ -78,7 +78,7 @@ entry:  define <16 x i8> @vld1dupqi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind {  entry:  ;CHECK-LABEL: vld1dupqi8_postinc_register: -;CHECK: vld1.8 {d16[], d17[]}, [r2], r1 +;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}], r1    %0 = load i8*, i8** %a, align 4    %1 = load i8, i8* %0, align 1    %2 = insertelement <16 x i8> undef, i8 %1, i32 0 @@ -91,7 +91,7 @@ entry:  define <4 x i16> @vld1dupi16(i16* %A) nounwind {  ;CHECK-LABEL: vld1dupi16:  ;Check the alignment value.  Max for this instruction is 16 bits: -;CHECK: vld1.16 {d16[]}, [r0:16] +;CHECK: vld1.16 {d16[]}, [{{r[0-9]+|lr}}:16]  	%tmp1 = load i16, i16* %A, align 8  	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0  	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer @@ -100,7 +100,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {  define <4 x i16> @vld1dupi16_misaligned(i16* %A) nounwind {  ;CHECK-LABEL: vld1dupi16_misaligned: -;CHECK: vld1.16 {d16[]}, [r0] +;CHECK: vld1.16 {d16[]}, [{{r[0-9]+|lr}}]  	%tmp1 = load i16, i16* %A, align 1  	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0  	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer @@ -110,7 +110,7 @@ define <4 x i16> @vld1dupi16_misaligned(i16* %A) nounwind {  ; This sort of looks like a vld1dup, but there's an extension in the way.  define <4 x i16> @load_i16_dup_zext(i8* %A) nounwind {  ;CHECK-LABEL: load_i16_dup_zext: -;CHECK: ldrb    r0, [r0] +;CHECK: ldrb    r0, [{{r[0-9]+|lr}}]  ;CHECK-NEXT: vdup.16 d16, r0          %tmp1 = load i8, i8* %A, align 1          %tmp2 = zext i8 %tmp1 to i16 @@ -122,7 +122,7 @@ define <4 x i16> @load_i16_dup_zext(i8* %A) nounwind {  ; This sort of looks like a vld1dup, but there's an extension in the way.  define <4 x i16> @load_i16_dup_sext(i8* %A) nounwind {  ;CHECK-LABEL: load_i16_dup_sext: -;CHECK: ldrsb    r0, [r0] +;CHECK: ldrsb    r0, [{{r[0-9]+|lr}}]  ;CHECK-NEXT: vdup.16 d16, r0          %tmp1 = load i8, i8* %A, align 1          %tmp2 = sext i8 %tmp1 to i16 @@ -134,7 +134,7 @@ define <4 x i16> @load_i16_dup_sext(i8* %A) nounwind {  ; This sort of looks like a vld1dup, but there's an extension in the way.  define <8 x i16> @load_i16_dupq_zext(i8* %A) nounwind {  ;CHECK-LABEL: load_i16_dupq_zext: -;CHECK: ldrb    r0, [r0] +;CHECK: ldrb    r0, [{{r[0-9]+|lr}}]  ;CHECK-NEXT: vdup.16 q8, r0          %tmp1 = load i8, i8* %A, align 1          %tmp2 = zext i8 %tmp1 to i16 @@ -146,7 +146,7 @@ define <8 x i16> @load_i16_dupq_zext(i8* %A) nounwind {  define <2 x i32> @vld1dupi32(i32* %A) nounwind {  ;CHECK-LABEL: vld1dupi32:  ;Check the alignment value.  Max for this instruction is 32 bits: -;CHECK: vld1.32 {d16[]}, [r0:32] +;CHECK: vld1.32 {d16[]}, [{{r[0-9]+|lr}}:32]  	%tmp1 = load i32, i32* %A, align 8  	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0  	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer @@ -156,7 +156,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {  ; This sort of looks like a vld1dup, but there's an extension in the way.  define <4 x i32> @load_i32_dup_zext(i8* %A) nounwind {  ;CHECK-LABEL: load_i32_dup_zext: -;CHECK: ldrb    r0, [r0] +;CHECK: ldrb    r0, [{{r[0-9]+|lr}}]  ;CHECK-NEXT: vdup.32 q8, r0          %tmp1 = load i8, i8* %A, align 1          %tmp2 = zext i8 %tmp1 to i32 @@ -168,7 +168,7 @@ define <4 x i32> @load_i32_dup_zext(i8* %A) nounwind {  ; This sort of looks like a vld1dup, but there's an extension in the way.  define <4 x i32> @load_i32_dup_sext(i8* %A) nounwind {  ;CHECK-LABEL: load_i32_dup_sext: -;CHECK: ldrsb    r0, [r0] +;CHECK: ldrsb    r0, [{{r[0-9]+|lr}}]  ;CHECK-NEXT: vdup.32 q8, r0          %tmp1 = load i8, i8* %A, align 1          %tmp2 = sext i8 %tmp1 to i32 @@ -179,7 +179,7 @@ define <4 x i32> @load_i32_dup_sext(i8* %A) nounwind {  define <2 x float> @vld1dupf(float* %A) nounwind {  ;CHECK-LABEL: vld1dupf: -;CHECK: vld1.32 {d16[]}, [r0:32] +;CHECK: vld1.32 {d16[]}, [{{r[0-9]+|lr}}:32]  	%tmp0 = load float, float* %A          %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0          %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer @@ -189,7 +189,7 @@ define <2 x float> @vld1dupf(float* %A) nounwind {  define <16 x i8> @vld1dupQi8(i8* %A) nounwind {  ;CHECK-LABEL: vld1dupQi8:  ;Check the (default) alignment value. -;CHECK: vld1.8 {d16[], d17[]}, [r0] +;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]  	%tmp1 = load i8, i8* %A, align 8  	%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0  	%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer @@ -198,7 +198,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {  define <4 x float> @vld1dupQf(float* %A) nounwind {  ;CHECK-LABEL: vld1dupQf: -;CHECK: vld1.32 {d16[], d17[]}, [r0:32] +;CHECK: vld1.32 {d16[], d17[]}, [{{r[0-9]+|lr}}:32]          %tmp0 = load float, float* %A          %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0          %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer @@ -212,7 +212,7 @@ define <4 x float> @vld1dupQf(float* %A) nounwind {  define <8 x i8> @vld2dupi8(i8* %A) nounwind {  ;CHECK-LABEL: vld2dupi8:  ;Check the (default) alignment value. -;CHECK: vld2.8 {d16[], d17[]}, [r0] +;CHECK: vld2.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]  	%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)  	%tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0  	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer @@ -283,7 +283,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {  ;CHECK-LABEL: vld2dupi16:  ;Check that a power-of-two alignment smaller than the total size of the memory  ;being loaded is ignored. -;CHECK: vld2.16 {d16[], d17[]}, [r0] +;CHECK: vld2.16 {d16[], d17[]}, [{{r[0-9]+|lr}}]  	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)  	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0  	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer @@ -296,7 +296,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {  ;Check for a post-increment updating load.   define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {  ;CHECK-LABEL: vld2dupi16_update: -;CHECK: vld2.16 {d16[], d17[]}, [r1]! +;CHECK: vld2.16 {d16[], d17[]}, [{{r[0-9]+|lr}}]!  	%A = load i16*, i16** %ptr          %A2 = bitcast i16* %A to i8*  	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) @@ -313,7 +313,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {  define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {  ;CHECK-LABEL: vld2dupi16_odd_update:  ;CHECK: mov [[INC:r[0-9]+]], #6 -;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]] +;CHECK: vld2.16 {d16[], d17[]}, [{{r[0-9]+|lr}}], [[INC]]  	%A = load i16*, i16** %ptr          %A2 = bitcast i16* %A to i8*  	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) @@ -330,7 +330,7 @@ define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {  define <2 x i32> @vld2dupi32(i8* %A) nounwind {  ;CHECK-LABEL: vld2dupi32:  ;Check the alignment value.  Max for this instruction is 64 bits: -;CHECK: vld2.32 {d16[], d17[]}, [r0:64] +;CHECK: vld2.32 {d16[], d17[]}, [{{r[0-9]+|lr}}:64]  	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)  	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0  	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer @@ -350,7 +350,7 @@ declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8*, <2 x  ;Check for a post-increment updating load with register increment.  define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {  ;CHECK-LABEL: vld3dupi8_update: -;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1 +;CHECK: vld3.8 {d16[], d17[], d18[]}, [{{r[0-9]+|lr}}], r1  	%A = load i8*, i8** %ptr  	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)  	%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0 @@ -369,7 +369,7 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {  define <4 x i16> @vld3dupi16(i8* %A) nounwind {  ;CHECK-LABEL: vld3dupi16:  ;Check the (default) alignment value. VLD3 does not support alignment. -;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0] +;CHECK: vld3.16 {d16[], d17[], d18[]}, [{{r[0-9]+|lr}}]  	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)  	%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0  	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer @@ -391,7 +391,7 @@ declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8*, <4 x  ;Check for a post-increment updating load.  define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {  ;CHECK-LABEL: vld4dupi16_update: -;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]! +;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [{{r[0-9]+|lr}}]!  	%A = load i16*, i16** %ptr          %A2 = bitcast i16* %A to i8*  	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1) @@ -415,7 +415,7 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {  ;CHECK-LABEL: vld4dupi32:  ;Check the alignment value.  An 8-byte alignment is allowed here even though  ;it is smaller than the total size of the memory being loaded. -;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64] +;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [{{r[0-9]+|lr}}:64]  	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)  	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0  	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index 866641f3fbbd..f5c0f09ed440 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -308,7 +308,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {  ;Check for a post-increment updating load with register increment.  define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {  ;CHECK-LABEL: vld3laneQi16_update: -;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}} +;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+|lr}}], {{r[0-9]+}}  	%A = load i16*, i16** %ptr  	%tmp0 = bitcast i16* %A to i8*  	%tmp1 = load <8 x i16>, <8 x i16>* %B diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll index 3409d37a31f4..3fa93bb43f03 100644 --- a/test/CodeGen/ARM/vpadd.ll +++ b/test/CodeGen/ARM/vpadd.ll @@ -285,17 +285,17 @@ define void @addCombineToVPADDLq_s8(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ss  define void @addCombineToVPADDL_s8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp {  ; CHECK-LABEL: addCombineToVPADDL_s8:  ; CHECK:       @ BB#0: -; CHECK-NEXT:    vld1.64 {d16, d17}, [r0] -; CHECK-NEXT:    vmov.i16 d18, #0x8 -; CHECK-NEXT:    vneg.s16 d18, d18 -; CHECK-NEXT:    vext.8 d19, d16, d16, #1 -; CHECK-NEXT:    vshl.i16 d16, d16, #8 -; CHECK-NEXT:    vshl.i16 d17, d19, #8 -; CHECK-NEXT:    vshl.s16 d16, d16, d18 -; CHECK-NEXT:    vshl.s16 d17, d17, d18 -; CHECK-NEXT:    vadd.i16 d16, d17, d16 -; CHECK-NEXT:    vstr d16, [r1] -; CHECK-NEXT:    mov pc, lr +; CHECK-NEXT:    vmov.i16	d16, #0x8 +; CHECK-NEXT:    vld1.64	{d18, d19}, [r0] +; CHECK-NEXT:    vext.8	d17, d18, d16, #1 +; CHECK-NEXT:    vneg.s16	d16, d16 +; CHECK-NEXT:    vshl.i16	d18, d18, #8 +; CHECK-NEXT:    vshl.i16	d17, d17, #8 +; CHECK-NEXT:    vshl.s16	d18, d18, d16 +; CHECK-NEXT:    vshl.s16	d16, d17, d16 +; CHECK-NEXT:    vadd.i16	d16, d16, d18 +; CHECK-NEXT:    vstr	d16, [r1] +; CHECK-NEXT:    mov	pc, lr    %tmp = load <16 x i8>, <16 x i8>* %cbcr    %tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>    %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll index 404129a7e6ad..e351a2ec2373 100644 --- a/test/CodeGen/ARM/vst1.ll +++ b/test/CodeGen/ARM/vst1.ll @@ -39,7 +39,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {  ;Check for a post-increment updating store.  define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {  ;CHECK-LABEL: vst1f_update: -;CHECK: vst1.32 {d16}, [r1]! +;CHECK: vst1.32 {d16}, [r{{[0-9]+}}]!  	%A = load float*, float** %ptr  	%tmp0 = bitcast float* %A to i8*  	%tmp1 = load <2 x float>, <2 x float>* %B diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll index 188955102290..afa4321c91a0 100644 --- a/test/CodeGen/ARM/vst4.ll +++ b/test/CodeGen/ARM/vst4.ll @@ -12,7 +12,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {  ;Check for a post-increment updating store with register increment.  define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {  ;CHECK-LABEL: vst4i8_update: -;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2 +;CHECK: vst4.8 {d16, d17, d18, d19}, [r{{[0-9]+}}:128], r2  	%A = load i8*, i8** %ptr  	%tmp1 = load <8 x i8>, <8 x i8>* %B  	call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16) @@ -62,7 +62,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {  define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {  ;CHECK-LABEL: vst4i64_update: -;CHECK: vst1.64	{d16, d17, d18, d19}, [r1]! +;CHECK: vst1.64	{d16, d17, d18, d19}, [r{{[0-9]+}}]!          %A = load i64*, i64** %ptr          %tmp0 = bitcast i64* %A to i8*          %tmp1 = load <1 x i64>, <1 x i64>* %B @@ -116,8 +116,8 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {  ;Check for a post-increment updating store.  define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {  ;CHECK-LABEL: vst4Qf_update: -;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]! -;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]! +  ;CHECK: vst4.32 {d16, d18, d20, d22}, [r[[REG:[0-9]+]]]! +;CHECK: vst4.32 {d17, d19, d21, d23}, [r[[REG]]]!  	%A = load float*, float** %ptr  	%tmp0 = bitcast float* %A to i8*  	%tmp1 = load <4 x float>, <4 x float>* %B diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index 7e130ea01b64..49af0be92316 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -127,7 +127,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {  ;Check for a post-increment updating store with register increment.  define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {  ;CHECK-LABEL: vst2lanei16_update: -;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2 +;CHECK: vst2.16 {d16[1], d17[1]}, [r{{[0-9]+}}], r{{[0-9]+}}  	%A = load i16*, i16** %ptr  	%tmp0 = bitcast i16* %A to i8*  	%tmp1 = load <4 x i16>, <4 x i16>* %B @@ -251,7 +251,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {  ;Check for a post-increment updating store.  define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {  ;CHECK-LABEL: vst3laneQi32_update: -;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]! +;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r{{[0-9]+}}]!  	%A = load i32*, i32** %ptr  	%tmp0 = bitcast i32* %A to i8*  	%tmp1 = load <4 x i32>, <4 x i32>* %B @@ -292,7 +292,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {  ;Check for a post-increment updating store.  define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {  ;CHECK-LABEL: vst4lanei8_update: -;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]! +;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r{{[0-9]+}}:32]!  	%A = load i8*, i8** %ptr  	%tmp1 = load <8 x i8>, <8 x i8>* %B  	call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index 0a5235df319f..24090cfd6c65 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -324,26 +324,23 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8  ; truncate from i32 to i16 and one vmovn.i16 to perform the final truncation for i8.  ; CHECK-LABEL: cmpsel_trunc:  ; CHECK:       @ BB#0: -; CHECK-NEXT:    .save {r4, r5, r11, lr} -; CHECK-NEXT:    push {r4, r5, r11, lr} -; CHECK-NEXT:    add r4, sp, #64 -; CHECK-NEXT:    add r5, sp, #32 -; CHECK-NEXT:    add r12, sp, #48 -; CHECK-NEXT:    add lr, sp, #16 -; CHECK-NEXT:    vld1.64 {d16, d17}, [r5] -; CHECK-NEXT:    vld1.64 {d18, d19}, [r4] -; CHECK-NEXT:    vld1.64 {d20, d21}, [lr] -; CHECK-NEXT:    vld1.64 {d22, d23}, [r12] -; CHECK-NEXT:    vcgt.u32 q8, q9, q8 -; CHECK-NEXT:    vcgt.u32 q9, q11, q10 -; CHECK-NEXT:    vmovn.i32 d17, q8 -; CHECK-NEXT:    vmovn.i32 d16, q9 -; CHECK-NEXT:    vmov d18, r2, r3 -; CHECK-NEXT:    vmov d19, r0, r1 -; CHECK-NEXT:    vmovn.i16 d16, q8 -; CHECK-NEXT:    vbsl d16, d19, d18 -; CHECK-NEXT:    vmov r0, r1, d16 -; CHECK-NEXT:    pop {r4, r5, r11, lr} +; CHECK-NEXT:	add	r12, sp, #16 +; CHECK-NEXT: 	vld1.64	{d16, d17}, [r12] +; CHECK-NEXT:	mov	r12, sp +; CHECK-NEXT:	vld1.64	{d18, d19}, [r12] +; CHECK-NEXT:	add	r12, sp, #48 +; CHECK-NEXT:	vld1.64	{d20, d21}, [r12] +; CHECK-NEXT:	add	r12, sp, #32 +; CHECK-NEXT:	vcgt.u32	q8, q10, q8 +; CHECK-NEXT:	vld1.64	{d20, d21}, [r12] +; CHECK-NEXT:	vcgt.u32	q9, q10, q9 +; CHECK-NEXT:	vmov	d20, r2, r3 +; CHECK-NEXT:	vmovn.i32	d17, q8 +; CHECK-NEXT:	vmovn.i32	d16, q9 +; CHECK-NEXT:	vmov	d18, r0, r1 +; CHECK-NEXT:	vmovn.i16	d16, q8 +; CHECK-NEXT:	vbsl	d16, d18, d20 +; CHECK-NEXT:	vmov	r0, r1, d16  ; CHECK-NEXT:    mov pc, lr    %c = icmp ult <8 x i32> %cmp0, %cmp1    %res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1 @@ -356,28 +353,28 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8  define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,  ; CHECK-LABEL: vuzp_trunc_and_shuffle:  ; CHECK:       @ BB#0: -; CHECK-NEXT:    .save {r4, lr} -; CHECK-NEXT:    push {r4, lr} -; CHECK-NEXT:    ldr r12, [sp, #40] -; CHECK-NEXT:    add lr, sp, #24 -; CHECK-NEXT:    add r4, sp, #8 -; CHECK-NEXT:    vld1.64 {d16, d17}, [r4] -; CHECK-NEXT:    vld1.64 {d18, d19}, [lr] -; CHECK-NEXT:    vld1.32 {d20[0]}, [r12:32] -; CHECK-NEXT:    vcgt.u32 q8, q9, q8 -; CHECK-NEXT:    vmovn.i32 d16, q8 -; CHECK-NEXT:    vmov.i8 d17, #0x7 -; CHECK-NEXT:    vneg.s8 d17, d17 -; CHECK-NEXT:    vmovl.u8 q9, d20 -; CHECK-NEXT:    vuzp.8 d16, d18 -; CHECK-NEXT:    vshl.i8 d16, d16, #7 -; CHECK-NEXT:    vmov d18, r2, r3 -; CHECK-NEXT:    vmov d19, r0, r1 -; CHECK-NEXT:    vshl.s8 d16, d16, d17 -; CHECK-NEXT:    vbsl d16, d19, d18 -; CHECK-NEXT:    vmov r0, r1, d16 -; CHECK-NEXT:    pop {r4, lr} -; CHECK-NEXT:    mov pc, lr +; CHECK-NEXT:	.save	{r11, lr} +; CHECK-NEXT:	push	{r11, lr} +; CHECK-NEXT:	add	r12, sp, #8 +; CHECK-NEXT:	add	lr, sp, #24 +; CHECK-NEXT:	vld1.64	{d16, d17}, [r12] +; CHECK-NEXT:	ldr	r12, [sp, #40] +; CHECK-NEXT:	vld1.64	{d18, d19}, [lr] +; CHECK-NEXT:	vcgt.u32	q8, q9, q8 +; CHECK-NEXT:	vld1.32	{d18[0]}, [r12:32] +; CHECK-NEXT:	vmov.i8	d19, #0x7 +; CHECK-NEXT:	vmovl.u8	q10, d18 +; CHECK-NEXT:	vmovn.i32	d16, q8 +; CHECK-NEXT:	vneg.s8	d17, d19 +; CHECK-NEXT:	vmov	d18, r2, r3 +; CHECK-NEXT:	vuzp.8	d16, d20 +; CHECK-NEXT:	vshl.i8	d16, d16, #7 +; CHECK-NEXT:	vshl.s8	d16, d16, d17 +; CHECK-NEXT:	vmov	d17, r0, r1 +; CHECK-NEXT:	vbsl	d16, d17, d18 +; CHECK-NEXT:	vmov	r0, r1, d16 +; CHECK-NEXT:	pop	{r11, lr} +; CHECK-NEXT:	mov	pc, lr                           <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {    %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4    %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1> @@ -392,25 +389,22 @@ define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,  define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,  ; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right:  ; CHECK:       @ BB#0: -; CHECK-NEXT:    .save {r11, lr} -; CHECK-NEXT:    push {r11, lr} -; CHECK-NEXT:    add r12, sp, #24 -; CHECK-NEXT:    add lr, sp, #8 -; CHECK-NEXT:    vld1.64 {d16, d17}, [lr] -; CHECK-NEXT:    vld1.64 {d18, d19}, [r12] -; CHECK-NEXT:    vcgt.u32 q8, q9, q8 -; CHECK-NEXT:    vmov d19, r0, r1 -; CHECK-NEXT:    vmovn.i32 d16, q8 -; CHECK-NEXT:    vmov.i8 d17, #0x7 -; CHECK-NEXT:    vuzp.8 d16, d18 -; CHECK-NEXT:    vneg.s8 d17, d17 -; CHECK-NEXT:    vshl.i8 d16, d16, #7 -; CHECK-NEXT:    vmov d18, r2, r3 -; CHECK-NEXT:    vshl.s8 d16, d16, d17 -; CHECK-NEXT:    vbsl d16, d19, d18 -; CHECK-NEXT:    vmov r0, r1, d16 -; CHECK-NEXT:    pop {r11, lr} -; CHECK-NEXT:    mov pc, lr +; CHECK-NEXT:	mov	r12, sp +; CHECK-NEXT:	vld1.64	{d16, d17}, [r12] +; CHECK-NEXT:	add	r12, sp, #16 +; CHECK-NEXT:	vld1.64	{d18, d19}, [r12] +; CHECK-NEXT:	vcgt.u32	q8, q9, q8 +; CHECK-NEXT:	vmov.i8	d18, #0x7 +; CHECK-NEXT:	vmovn.i32	d16, q8 +; CHECK-NEXT:	vuzp.8	d16, d17 +; CHECK-NEXT:	vneg.s8	d17, d18 +; CHECK-NEXT:	vshl.i8	d16, d16, #7 +; CHECK-NEXT:	vmov	d18, r2, r3 +; CHECK-NEXT:	vshl.s8	d16, d16, d17 +; CHECK-NEXT:	vmov	d17, r0, r1 +; CHECK-NEXT:	vbsl	d16, d17, d18 +; CHECK-NEXT:	vmov	r0, r1, d16 +; CHECK-NEXT:	mov	pc, lr                           <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {    %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4    %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1> @@ -423,26 +417,23 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1  define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,  ; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left:  ; CHECK:       @ BB#0: -; CHECK-NEXT:    .save {r11, lr} -; CHECK-NEXT:    push {r11, lr} -; CHECK-NEXT:    add r12, sp, #24 -; CHECK-NEXT:    add lr, sp, #8 -; CHECK-NEXT:    vldr d20, .LCPI22_0 -; CHECK-NEXT:    vld1.64 {d16, d17}, [lr] -; CHECK-NEXT:    vld1.64 {d18, d19}, [r12] -; CHECK-NEXT:    vcgt.u32 q8, q9, q8 -; CHECK-NEXT:    vmov d18, r2, r3 -; CHECK-NEXT:    vmov d19, r0, r1 -; CHECK-NEXT:    vmovn.i32 d16, q8 -; CHECK-NEXT:    vmov.i8 d17, #0x7 -; CHECK-NEXT:    vtbl.8 d16, {d16}, d20 -; CHECK-NEXT:    vneg.s8 d17, d17 -; CHECK-NEXT:    vshl.i8 d16, d16, #7 -; CHECK-NEXT:    vshl.s8 d16, d16, d17 -; CHECK-NEXT:    vbsl d16, d19, d18 -; CHECK-NEXT:    vmov r0, r1, d16 -; CHECK-NEXT:    pop {r11, lr} -; CHECK-NEXT:    mov pc, lr +; CHECK-NEXT:	mov	r12, sp +; CHECK-NEXT:	vld1.64	{d16, d17}, [r12] +; CHECK-NEXT:	add	r12, sp, #16 +; CHECK-NEXT:	vld1.64	{d18, d19}, [r12] +; CHECK-NEXT:	vcgt.u32	q8, q9, q8 +; CHECK-NEXT:	vldr	d18, .LCPI22_0 +; CHECK-NEXT:	vmov.i8	d19, #0x7 +; CHECK-NEXT:	vmovn.i32	d16, q8 +; CHECK-NEXT:	vtbl.8	d16, {d16}, d18 +; CHECK-NEXT:	vneg.s8	d17, d19 +; CHECK-NEXT:	vmov	d18, r2, r3 +; CHECK-NEXT:	vshl.i8	d16, d16, #7 +; CHECK-NEXT:	vshl.s8	d16, d16, d17 +; CHECK-NEXT:	vmov	d17, r0, r1 +; CHECK-NEXT:	vbsl	d16, d17, d18 +; CHECK-NEXT:	vmov	r0, r1, d16 +; CHECK-NEXT:	mov	pc, lr  ; CHECK-NEXT:    .p2align 3  ; CHECK-NEXT:  @ BB#1:  ; CHECK-NEXT:  .LCPI22_0: @@ -468,65 +459,63 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,  define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,  ; CHECK-LABEL: vuzp_wide_type:  ; CHECK:       @ BB#0: -; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT:    .setfp r11, sp, #16 -; CHECK-NEXT:    add r11, sp, #16 -; CHECK-NEXT:    .pad #8 -; CHECK-NEXT:    sub sp, sp, #8 -; CHECK-NEXT:    bic sp, sp, #15 -; CHECK-NEXT:    add r5, r11, #52 -; CHECK-NEXT:    add r7, r11, #32 -; CHECK-NEXT:    add r4, r11, #44 -; CHECK-NEXT:    add r6, r11, #24 -; CHECK-NEXT:    add r12, r11, #60 -; CHECK-NEXT:    add lr, r11, #40 -; CHECK-NEXT:    vld1.32 {d17[0]}, [r7:32] -; CHECK-NEXT:    vld1.32 {d19[0]}, [r5:32] -; CHECK-NEXT:    vld1.32 {d22[0]}, [r12:32] -; CHECK-NEXT:    ldr r12, [r11, #64] -; CHECK-NEXT:    vld1.32 {d20[0]}, [lr:32] -; CHECK-NEXT:    add r7, r11, #48 -; CHECK-NEXT:    add r5, r11, #28 -; CHECK-NEXT:    vld1.32 {d16[0]}, [r6:32] -; CHECK-NEXT:    vld1.32 {d18[0]}, [r4:32] -; CHECK-NEXT:    add r6, r11, #56 -; CHECK-NEXT:    add r4, r11, #36 -; CHECK-NEXT:    vcgt.u32 q10, q11, q10 -; CHECK-NEXT:    vld1.32 {d19[1]}, [r6:32] -; CHECK-NEXT:    vld1.32 {d17[1]}, [r4:32] -; CHECK-NEXT:    add r6, r12, #4 -; CHECK-NEXT:    vld1.32 {d18[1]}, [r7:32] -; CHECK-NEXT:    vld1.32 {d16[1]}, [r5:32] -; CHECK-NEXT:    ldr r7, [r12] -; CHECK-NEXT:    vcgt.u32 q8, q9, q8 -; CHECK-NEXT:    vmovn.i32 d18, q10 -; CHECK-NEXT:    vmov.32 d21[0], r7 -; CHECK-NEXT:    vmovn.i32 d16, q8 -; CHECK-NEXT:    vmov.u8 r7, d21[3] -; CHECK-NEXT:    vmov.i8 d17, #0x7 -; CHECK-NEXT:    vuzp.8 d16, d18 -; CHECK-NEXT:    vmov.8 d23[0], r7 -; CHECK-NEXT:    vneg.s8 d17, d17 -; CHECK-NEXT:    add r7, r11, #8 -; CHECK-NEXT:    vldr d18, .LCPI23_0 -; CHECK-NEXT:    vld1.8 {d23[1]}, [r6] -; CHECK-NEXT:    vshl.i8 d16, d16, #7 -; CHECK-NEXT:    vshl.s8 d20, d16, d17 -; CHECK-NEXT:    vmov.i8 q8, #0x7 -; CHECK-NEXT:    vneg.s8 q8, q8 -; CHECK-NEXT:    vtbl.8 d22, {d20, d21}, d18 -; CHECK-NEXT:    vld1.64 {d18, d19}, [r7] -; CHECK-NEXT:    vshl.i8 q10, q11, #7 -; CHECK-NEXT:    vmov d23, r2, r3 -; CHECK-NEXT:    vmov d22, r0, r1 -; CHECK-NEXT:    vshl.s8 q8, q10, q8 -; CHECK-NEXT:    vbsl q8, q11, q9 -; CHECK-NEXT:    vmov r0, r1, d16 -; CHECK-NEXT:    vmov r2, r3, d17 -; CHECK-NEXT:    sub sp, r11, #16 -; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT:    mov pc, lr +; CHECK-NEXT:	.save	{r4, r10, r11, lr} +; CHECK-NEXT:	push	{r4, r10, r11, lr} +; CHECK-NEXT:	.setfp	r11, sp, #8 +; CHECK-NEXT:	add	r11, sp, #8 +; CHECK-NEXT:	bic	sp, sp, #15 +; CHECK-NEXT:	add	r12, r11, #32 +; CHECK-NEXT:	add	lr, r11, #60 +; CHECK-NEXT:	vld1.32	{d17[0]}, [r12:32] +; CHECK-NEXT:	add	r12, r11, #24 +; CHECK-NEXT:	vld1.32	{d22[0]}, [lr:32] +; CHECK-NEXT:	add	lr, r11, #36 +; CHECK-NEXT:	vld1.32	{d16[0]}, [r12:32] +; CHECK-NEXT:	add	r12, r11, #52 +; CHECK-NEXT:	vld1.32	{d19[0]}, [r12:32] +; CHECK-NEXT:	add	r12, r11, #44 +; CHECK-NEXT:	vld1.32	{d17[1]}, [lr:32] +; CHECK-NEXT:	vld1.32	{d18[0]}, [r12:32] +; CHECK-NEXT:	add	r12, r11, #40 +; CHECK-NEXT:	vld1.32	{d20[0]}, [r12:32] +; CHECK-NEXT:	ldr	r12, [r11, #64] +; CHECK-NEXT:	vcgt.u32	q10, q11, q10 +; CHECK-NEXT:	ldr	r4, [r12] +; CHECK-NEXT:	vmov.32	d25[0], r4 +; CHECK-NEXT:	add	r4, r11, #28 +; CHECK-NEXT:	vld1.32	{d16[1]}, [r4:32] +; CHECK-NEXT:	add	r4, r11, #56 +; CHECK-NEXT:	vld1.32	{d19[1]}, [r4:32] +; CHECK-NEXT:	add	r4, r11, #48 +; CHECK-NEXT:	vmov.u8	lr, d25[3] +; CHECK-NEXT:	vld1.32	{d18[1]}, [r4:32] +; CHECK-NEXT:	add	r4, r12, #4 +; CHECK-NEXT:	vcgt.u32	q8, q9, q8 +; CHECK-NEXT:	vmovn.i32	d19, q10 +; CHECK-NEXT:	vldr	d20, .LCPI23_0 +; CHECK-NEXT:	vmov.i8	d18, #0x7 +; CHECK-NEXT:	vmovn.i32	d16, q8 +; CHECK-NEXT:	vneg.s8	d17, d18 +; CHECK-NEXT:	vuzp.8	d16, d19 +; CHECK-NEXT:	vmov.i8	q9, #0x7 +; CHECK-NEXT:	vshl.i8	d16, d16, #7 +; CHECK-NEXT:	vneg.s8	q9, q9 +; CHECK-NEXT:	vshl.s8	d24, d16, d17 +; CHECK-NEXT:	vmov.8	d17[0], lr +; CHECK-NEXT:	vtbl.8	d16, {d24, d25}, d20 +; CHECK-NEXT:	vld1.8	{d17[1]}, [r4] +; CHECK-NEXT:	add	r4, r11, #8 +; CHECK-NEXT:	vshl.i8	q8, q8, #7 +; CHECK-NEXT:	vld1.64	{d20, d21}, [r4] +; CHECK-NEXT:	vshl.s8	q8, q8, q9 +; CHECK-NEXT:	vmov	d19, r2, r3 +; CHECK-NEXT:	vmov	d18, r0, r1 +; CHECK-NEXT:	vbsl	q8, q9, q10 +; CHECK-NEXT:	vmov	r0, r1, d16 +; CHECK-NEXT:	vmov	r2, r3, d17 +; CHECK-NEXT:	sub	sp, r11, #8 +; CHECK-NEXT:	pop	{r4, r10, r11, lr} +; CHECK-NEXT:	mov	pc, lr  ; CHECK-NEXT:    .p2align 3  ; CHECK-NEXT:  @ BB#1:  ; CHECK-NEXT:  .LCPI23_0:  | 
