diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
commit | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch) | |
tree | 5343938942df402b49ec7300a1c25a2d4ccd5821 /test/CodeGen/ARM | |
parent | 31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff) |
Notes
Diffstat (limited to 'test/CodeGen/ARM')
89 files changed, 6908 insertions, 416 deletions
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll index 52cc37e240845..b8f2980be7502 100644 --- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll +++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll @@ -12,11 +12,11 @@ ; CHECK: bl _quux ; CHECK-NOT: bl _quux -; NOMERGE: bl _baz -; NOMERGE: bl _baz +; NOMERGE-DAG: bl _baz +; NOMERGE-DAG: bl _baz -; NOMERGE: bl _quux -; NOMERGE: bl _quux +; NOMERGE-DAG: bl _quux +; NOMERGE-DAG: bl _quux ; ModuleID = 'tail.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll index 5d59fc64d9222..e5c2fb4d67a1b 100644 --- a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll +++ b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll @@ -1,5 +1,4 @@ ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s ; PR4091 define void @foo(i32 %i, i32* %p) nounwind { diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll index 4a1341c4d6e71..2a5af6199a345 100644 --- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll +++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll @@ -12,13 +12,14 @@ define void @test_byval_8_bytes_alignment(i32 %i, ...) { entry: ; CHECK: sub sp, sp, #12 ; CHECK: sub sp, sp, #4 -; CHECK: stmib sp, {r1, r2, r3} +; CHECK: add r0, sp, #4 +; CHECK: stm sp, {r0, r1, r2, r3} %g = alloca i8* %g1 = bitcast i8** %g to i8* call void @llvm.va_start(i8* %g1) ; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7 -; CHECK: bfc [[REG]], #0, #3 +; CHECK: bic [[REG]], [[REG]], #7 %0 = va_arg i8** %g, double call void @llvm.va_end(i8* %g1) diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir index 5c0853cfaab45..66d9033a6d7cb 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -1,10 +1,135 @@ # RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | + define void @test_zext_s1() { ret void } + define void @test_sext_s1() { ret void } + define void @test_sext_s8() { ret void } + define void @test_zext_s16() { ret void } + define void @test_add_s8() { ret void } define void @test_add_s16() { ret void } define void @test_add_s32() { ret void } + define void @test_fadd_s32() #0 { ret void } + define void @test_fadd_s64() #0 { ret void } + define void @test_load_from_stack() { ret void } + define void @test_load_f32() #0 { ret void } + define void @test_load_f64() #0 { ret void } + + define void @test_stores() #0 { ret void } + + define void @test_gep() { ret void } + define void @test_constants() { ret void } + + define void @test_soft_fp_double() #0 { ret void } + + attributes #0 = { "target-features"="+vfp2,-neonfp" } +... +--- +name: test_zext_s1 +# CHECK-LABEL: name: test_zext_s1 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } +body: | + bb.0: + liveins: %r0 + + %0(s1) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = G_ZEXT %0(s1) + ; CHECK: [[VREGEXT:%[0-9]+]] = ANDri [[VREGX]], 1, 14, _, _ + + %r0 = COPY %1(s32) + ; CHECK: %r0 = COPY [[VREGEXT]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_sext_s1 +# CHECK-LABEL: name: test_sext_s1 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +body: | + bb.0: + liveins: %r0 + + %0(s1) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = G_SEXT %0(s1) + ; CHECK: [[VREGAND:%[0-9]+]] = ANDri [[VREGX]], 1, 14, _, _ + ; CHECK: [[VREGEXT:%[0-9]+]] = RSBri [[VREGAND]], 0, 14, _, _ + + %r0 = COPY %1(s32) + ; CHECK: %r0 = COPY [[VREGEXT]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_sext_s8 +# CHECK-LABEL: name: test_sext_s8 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } +body: | + bb.0: + liveins: %r0 + + %0(s8) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = G_SEXT %0(s8) + ; CHECK: [[VREGEXT:%[0-9]+]] = SXTB [[VREGX]], 0, 14, _ + + %r0 = COPY %1(s32) + ; CHECK: %r0 = COPY [[VREGEXT]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_zext_s16 +# CHECK-LABEL: name: test_zext_s16 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } +body: | + bb.0: + liveins: %r0 + + %0(s16) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = G_ZEXT %0(s16) + ; CHECK: [[VREGEXT:%[0-9]+]] = UXTH [[VREGX]], 0, 14, _ + + %r0 = COPY %1(s32) + ; CHECK: %r0 = COPY [[VREGEXT]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_add_s8 @@ -106,6 +231,72 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- +name: test_fadd_s32 +# CHECK-LABEL: name: test_fadd_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } +# CHECK: id: 0, class: spr +# CHECK: id: 1, class: spr +# CHECK: id: 2, class: spr +body: | + bb.0: + liveins: %s0, %s1 + + %0(s32) = COPY %s0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 + + %1(s32) = COPY %s1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 + + %2(s32) = G_FADD %0, %1 + ; CHECK: [[VREGSUM:%[0-9]+]] = VADDS [[VREGX]], [[VREGY]], 14, _ + + %s0 = COPY %2(s32) + ; CHECK: %s0 = COPY [[VREGSUM]] + + BX_RET 14, _, implicit %s0 + ; CHECK: BX_RET 14, _, implicit %s0 +... +--- +name: test_fadd_s64 +# CHECK-LABEL: name: test_fadd_s64 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } +# CHECK: id: 0, class: dpr +# CHECK: id: 1, class: dpr +# CHECK: id: 2, class: dpr +body: | + bb.0: + liveins: %d0, %d1 + + %0(s64) = COPY %d0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 + + %1(s64) = COPY %d1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 + + %2(s64) = G_FADD %0, %1 + ; CHECK: [[VREGSUM:%[0-9]+]] = VADDD [[VREGX]], [[VREGY]], 14, _ + + %d0 = COPY %2(s64) + ; CHECK: %d0 = COPY [[VREGSUM]] + + BX_RET 14, _, implicit %d0 + ; CHECK: BX_RET 14, _, implicit %d0 +... +--- name: test_load_from_stack # CHECK-LABEL: name: test_load_from_stack legalized: true @@ -122,20 +313,225 @@ registers: # CHECK-DAG: id: 2, class: gpr # CHECK-DAG: id: 3, class: gpr fixedStack: - - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false } + - { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false } - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } - { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false } -# CHECK: id: [[FRAME_INDEX:[0-9]+]], offset: 8 +# CHECK-DAG: id: [[FI1:[0-9]+]], offset: 0 +# CHECK-DAG: id: [[FI32:[0-9]+]], offset: 8 body: | bb.0: liveins: %r0, %r1, %r2, %r3 %0(p0) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[FIVREG:%[0-9]+]] = ADDri %fixed-stack.[[FRAME_INDEX]], 0, 14, _, _ + ; CHECK: [[FI32VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI32]], 0, 14, _, _ + + %1(s32) = G_LOAD %0(p0) :: (load 4) + ; CHECK: [[LD32VREG:%[0-9]+]] = LDRi12 [[FI32VREG]], 0, 14, _ + + %r0 = COPY %1 + ; CHECK: %r0 = COPY [[LD32VREG]] + + %2(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[FI1VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI1]], 0, 14, _, _ - %1(s32) = G_LOAD %0(p0) - ; CHECK: {{%[0-9]+}} = LDRi12 [[FIVREG]], 0, 14, _ + %3(s1) = G_LOAD %2(p0) :: (load 1) + ; CHECK: [[LD1VREG:%[0-9]+]] = LDRBi12 [[FI1VREG]], 0, 14, _ + + %r0 = COPY %3 + ; CHECK: %r0 = COPY [[LD1VREG]] BX_RET 14, _ ; CHECK: BX_RET 14, _ ... +--- +name: test_load_f32 +# CHECK-LABEL: name: test_load_f32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: fprb } +# CHECK-DAG: id: [[P:[0-9]+]], class: gpr +# CHECK-DAG: id: [[V:[0-9]+]], class: spr +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3 + + %0(p0) = COPY %r0 + + %1(s32) = G_LOAD %0(p0) :: (load 4) + ; CHECK: %[[V]] = VLDRS %[[P]], 0, 14, _ + + %s0 = COPY %1 + ; CHECK: %s0 = COPY %[[V]] + + BX_RET 14, _, implicit %s0 + ; CHECK: BX_RET 14, _, implicit %s0 +... +--- +name: test_load_f64 +# CHECK-LABEL: name: test_load_f64 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: fprb } +# CHECK-DAG: id: [[P:[0-9]+]], class: gpr +# CHECK-DAG: id: [[V:[0-9]+]], class: dpr +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3 + + %0(p0) = COPY %r0 + + %1(s64) = G_LOAD %0(p0) :: (load 8) + ; CHECK: %[[V]] = VLDRD %[[P]], 0, 14, _ + + %d0 = COPY %1 + ; CHECK: %d0 = COPY %[[V]] + + BX_RET 14, _, implicit %d0 + ; CHECK: BX_RET 14, _, implicit %d0 +... +--- +name: test_stores +# CHECK-LABEL: name: test_stores +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: fprb } + - { id: 5, class: fprb } +# CHECK: id: [[P:[0-9]+]], class: gpr +# CHECK: id: [[I8:[0-9]+]], class: gpr +# CHECK: id: [[I16:[0-9]+]], class: gpr +# CHECK: id: [[I32:[0-9]+]], class: gpr +# CHECK: id: [[F32:[0-9]+]], class: spr +# CHECK: id: [[F64:[0-9]+]], class: dpr +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3 + + %0(p0) = COPY %r0 + %1(s8) = COPY %r3 + %2(s16) = COPY %r2 + %3(s32) = COPY %r1 + %4(s32) = COPY %s0 + %5(s64) = COPY %d2 + + G_STORE %1(s8), %0(p0) :: (store 1) + ; CHECK: STRBi12 %[[I8]], %[[P]], 0, 14, _ + + G_STORE %2(s16), %0(p0) :: (store 2) + ; CHECK: STRH %[[I16]], %[[P]], _, 0, 14, _ + + G_STORE %3(s32), %0(p0) :: (store 4) + ; CHECK: STRi12 %[[I32]], %[[P]], 0, 14, _ + + G_STORE %4(s32), %0(p0) :: (store 4) + ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14, _ + + G_STORE %5(s64), %0(p0) :: (store 8) + ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14, _ + + BX_RET 14, _ +... +--- +name: test_gep +# CHECK-LABEL: name: test_gep +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: [[PTR:[0-9]+]], class: gpr +# CHECK: id: [[OFF:[0-9]+]], class: gpr +# CHECK: id: [[GEP:[0-9]+]], class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(p0) = COPY %r0 + %1(s32) = COPY %r1 + + %2(p0) = G_GEP %0, %1(s32) + ; CHECK: %[[GEP]] = ADDrr %[[PTR]], %[[OFF]], 14, _, _ + + %r0 = COPY %2(p0) + BX_RET 14, _, implicit %r0 +... +--- +name: test_constants +# CHECK-LABEL: name: test_constants +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } +# CHECK: id: [[C:[0-9]+]], class: gpr +body: | + bb.0: + %0(s32) = G_CONSTANT 42 + ; CHECK: %[[C]] = MOVi 42, 14, _, _ + + %r0 = COPY %0(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_soft_fp_double +# CHECK-LABEL: name: test_soft_fp_double +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: fprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +# CHECK-DAG: id: {{[0-9]+}}, class: gpr +# CHECK-DAG: id: {{[0-9]+}}, class: gpr +# CHECK-DAG: id: {{[0-9]+}}, class: gpr +# CHECK-DAG: id: {{[0-9]+}}, class: gpr +# CHECK-DAG: id: [[DREG:[0-9]+]], class: dpr +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3 + + %0(s32) = COPY %r2 + ; CHECK: [[IN1:%[0-9]+]] = COPY %r2 + + %1(s32) = COPY %r3 + ; CHECK: [[IN2:%[0-9]+]] = COPY %r3 + + %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 1 + ; CHECK: %[[DREG]] = VMOVDRR [[IN1]], [[IN2]] + + %3(s32) = G_EXTRACT %2(s64), 0 + %4(s32) = G_EXTRACT %2(s64), 32 + ; CHECK: [[OUT1:%[0-9]+]] = VGETLNi32 %[[DREG]], 0 + ; CHECK: [[OUT2:%[0-9]+]] = VGETLNi32 %[[DREG]], 1 + + %r0 = COPY %3 + ; CHECK: %r0 = COPY [[OUT1]] + + %r1 = COPY %4 + ; CHECK: %r1 = COPY [[OUT2]] + + BX_RET 14, _, implicit %r0, implicit %r1 + ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +... diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index f863ed5a6849b..a7f5ec33bee3c 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple arm-unknown -global-isel -stop-after=irtranslator %s -o - | FileCheck %s +; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE +; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG define void @test_void_return() { ; CHECK-LABEL: name: test_void_return @@ -7,6 +8,20 @@ entry: ret void } +define signext i1 @test_add_i1(i1 %x, i1 %y) { +; CHECK-LABEL: name: test_add_i1 +; CHECK: liveins: %r0, %r1 +; CHECK-DAG: [[VREGX:%[0-9]+]](s1) = COPY %r0 +; CHECK-DAG: [[VREGY:%[0-9]+]](s1) = COPY %r1 +; CHECK: [[SUM:%[0-9]+]](s1) = G_ADD [[VREGX]], [[VREGY]] +; CHECK: [[EXT:%[0-9]+]](s32) = G_SEXT [[SUM]] +; CHECK: %r0 = COPY [[EXT]](s32) +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %sum = add i1 %x, %y + ret i1 %sum +} + define i8 @test_add_i8(i8 %x, i8 %y) { ; CHECK-LABEL: name: test_add_i8 ; CHECK: liveins: %r0, %r1 @@ -20,6 +35,17 @@ entry: ret i8 %sum } +define signext i8 @test_return_sext_i8(i8 %x) { +; CHECK-LABEL: name: test_return_sext_i8 +; CHECK: liveins: %r0 +; CHECK: [[VREG:%[0-9]+]](s8) = COPY %r0 +; CHECK: [[VREGEXT:%[0-9]+]](s32) = G_SEXT [[VREG]] +; CHECK: %r0 = COPY [[VREGEXT]](s32) +; CHECK: BX_RET 14, _, implicit %r0 +entry: + ret i8 %x +} + define i16 @test_add_i16(i16 %x, i16 %y) { ; CHECK-LABEL: name: test_add_i16 ; CHECK: liveins: %r0, %r1 @@ -33,6 +59,17 @@ entry: ret i16 %sum } +define zeroext i16 @test_return_zext_i16(i16 %x) { +; CHECK-LABEL: name: test_return_zext_i16 +; CHECK: liveins: %r0 +; CHECK: [[VREG:%[0-9]+]](s16) = COPY %r0 +; CHECK: [[VREGEXT:%[0-9]+]](s32) = G_ZEXT [[VREG]] +; CHECK: %r0 = COPY [[VREGEXT]](s32) +; CHECK: BX_RET 14, _, implicit %r0 +entry: + ret i16 %x +} + define i32 @test_add_i32(i32 %x, i32 %y) { ; CHECK-LABEL: name: test_add_i32 ; CHECK: liveins: %r0, %r1 @@ -46,8 +83,8 @@ entry: ret i32 %sum } -define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { -; CHECK-LABEL: name: test_many_args +define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { +; CHECK-LABEL: name: test_stack_args ; CHECK: fixedStack: ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 4 @@ -62,3 +99,527 @@ entry: %sum = add i32 %p2, %p5 ret i32 %sum } + +define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, + i8 signext %p4, i16 signext %p5) { +; CHECK-LABEL: name: test_stack_args_signext +; CHECK: fixedStack: +; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 +; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[VREGP1:%[0-9]+]]{{.*}} = COPY %r1 +; CHECK: [[FIP5:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5:%[0-9]+]]{{.*}} = G_LOAD [[FIP5]](p0) +; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP1]], [[VREGP5]] +; CHECK: %r0 = COPY [[SUM]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %sum = add i16 %p1, %p5 + ret i16 %sum +} + +define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, + i8 zeroext %p4, i16 zeroext %p5) { +; CHECK-LABEL: name: test_stack_args_zeroext +; CHECK: fixedStack: +; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 +; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[VREGP2:%[0-9]+]]{{.*}} = COPY %r2 +; CHECK: [[FIP4:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P4]] +; CHECK: [[VREGP4:%[0-9]+]]{{.*}} = G_LOAD [[FIP4]](p0) +; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP2]], [[VREGP4]] +; CHECK: %r0 = COPY [[SUM]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %sum = add i8 %p2, %p4 + ret i8 %sum +} + +define i16 @test_ptr_arg(i16* %p) { +; CHECK-LABEL: name: test_ptr_arg +; CHECK: liveins: %r0 +; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0 +; CHECK: [[VREGV:%[0-9]+]](s16) = G_LOAD [[VREGP]](p0) +entry: + %v = load i16, i16* %p + ret i16 %v +} + +define i32* @test_ptr_ret(i32** %p) { +; Test pointer returns and pointer-to-pointer arguments +; CHECK-LABEL: name: test_ptr_ret +; CHECK: liveins: %r0 +; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0 +; CHECK: [[VREGV:%[0-9]+]](p0) = G_LOAD [[VREGP]](p0) +; CHECK: %r0 = COPY [[VREGV]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %v = load i32*, i32** %p + ret i32* %v +} + +define i32 @test_ptr_arg_on_stack(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32* %p) { +; CHECK-LABEL: name: test_ptr_arg_on_stack +; CHECK: fixedStack: +; CHECK: id: [[P:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[FIP:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P]] +; CHECK: [[VREGP:%[0-9]+]](p0) = G_LOAD [[FIP]](p0) +; CHECK: [[VREGV:%[0-9]+]](s32) = G_LOAD [[VREGP]](p0) +; CHECK: %r0 = COPY [[VREGV]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %v = load i32, i32* %p + ret i32 %v +} + +define arm_aapcscc float @test_float_aapcscc(float %p0, float %p1, float %p2, + float %p3, float %p4, float %p5) { +; CHECK-LABEL: name: test_float_aapcscc +; CHECK: fixedStack: +; CHECK-DAG: id: [[P4:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4 +; CHECK-DAG: id: [[P5:[0-9]+]]{{.*}}offset: 4{{.*}}size: 4 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0) +; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGP5]] +; CHECK: %r0 = COPY [[VREGV]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %v = fadd float %p1, %p5 + ret float %v +} + +define arm_aapcs_vfpcc float @test_float_vfpcc(float %p0, float %p1, float %p2, + float %p3, float %p4, float %p5, + float %ridiculous, + float %number, + float %of, + float %parameters, + float %that, + float %should, + float %never, + float %exist, + float %in, + float %practice, + float %q0, float %q1) { +; CHECK-LABEL: name: test_float_vfpcc +; CHECK: fixedStack: +; CHECK-DAG: id: [[Q0:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4 +; CHECK-DAG: id: [[Q1:[0-9]+]]{{.*}}offset: 4{{.*}}size: 4 +; CHECK: liveins: %s0, %s1, %s2, %s3, %s4, %s5, %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15 +; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %s1 +; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] +; CHECK: [[VREGQ1:%[0-9]+]](s32) = G_LOAD [[FIQ1]](p0) +; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGQ1]] +; CHECK: %s0 = COPY [[VREGV]] +; CHECK: BX_RET 14, _, implicit %s0 +entry: + %v = fadd float %p1, %q1 + ret float %v +} + +define arm_aapcs_vfpcc double @test_double_vfpcc(double %p0, double %p1, double %p2, + double %p3, double %p4, double %p5, + double %reasonable, + double %parameters, + double %q0, double %q1) { +; CHECK-LABEL: name: test_double_vfpcc +; CHECK: fixedStack: +; CHECK-DAG: id: [[Q0:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8 +; CHECK-DAG: id: [[Q1:[0-9]+]]{{.*}}offset: 8{{.*}}size: 8 +; CHECK: liveins: %d0, %d1, %d2, %d3, %d4, %d5, %d6, %d7 +; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d1 +; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] +; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0) +; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]] +; CHECK: %d0 = COPY [[VREGV]] +; CHECK: BX_RET 14, _, implicit %d0 +entry: + %v = fadd double %p1, %q1 + ret double %v +} + +define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p2, + double %p3, double %p4, double %p5) { +; CHECK-LABEL: name: test_double_aapcscc +; CHECK: fixedStack: +; CHECK-DAG: id: [[P2:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8 +; CHECK-DAG: id: [[P3:[0-9]+]]{{.*}}offset: 8{{.*}}size: 8 +; CHECK-DAG: id: [[P4:[0-9]+]]{{.*}}offset: 16{{.*}}size: 8 +; CHECK-DAG: id: [[P5:[0-9]+]]{{.*}}offset: 24{{.*}}size: 8 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK-DAG: [[VREGP1LO:%[0-9]+]](s32) = COPY %r2 +; CHECK-DAG: [[VREGP1HI:%[0-9]+]](s32) = COPY %r3 +; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1LO]](s32), 0, [[VREGP1HI]](s32), 32 +; BIG: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1HI]](s32), 0, [[VREGP1LO]](s32), 32 +; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0) +; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGP5]] +; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 +; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 +; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; CHECK-DAG: %r0 = COPY [[VREGVLO]] +; CHECK-DAG: %r1 = COPY [[VREGVHI]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +entry: + %v = fadd double %p1, %p5 + ret double %v +} + +define arm_aapcs_vfpcc double @test_double_gap_vfpcc(double %p0, float %filler, + double %p1, double %p2, + double %p3, double %p4, + double %reasonable, + double %parameters, + double %q0, double %q1) { +; CHECK-LABEL: name: test_double_gap_vfpcc +; CHECK: fixedStack: +; CHECK-DAG: id: [[Q0:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8 +; CHECK-DAG: id: [[Q1:[0-9]+]]{{.*}}offset: 8{{.*}}size: 8 +; CHECK: liveins: %d0, %d2, %d3, %d4, %d5, %d6, %d7, %s2 +; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d2 +; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] +; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0) +; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]] +; CHECK: %d0 = COPY [[VREGV]] +; CHECK: BX_RET 14, _, implicit %d0 +entry: + %v = fadd double %p1, %q1 + ret double %v +} + +define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0, + double %p1) { +; CHECK-LABEL: name: test_double_gap_aapcscc +; CHECK: fixedStack: +; CHECK-DAG: id: [[P1:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8 +; CHECK: liveins: %r0, %r2, %r3 +; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r2 +; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r3 +; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32 +; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32 +; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]] +; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0) +; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]] +; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 +; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 +; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; CHECK-DAG: %r0 = COPY [[VREGVLO]] +; CHECK-DAG: %r1 = COPY [[VREGVHI]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +entry: + %v = fadd double %p0, %p1 + ret double %v +} + +define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler, + double %p1) { +; CHECK-LABEL: name: test_double_gap2_aapcscc +; CHECK: fixedStack: +; CHECK-DAG: id: [[P1:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8 +; CHECK: liveins: %r0, %r1, %r2 +; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r1 +; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32 +; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32 +; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]] +; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0) +; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]] +; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 +; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 +; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; CHECK-DAG: %r0 = COPY [[VREGVLO]] +; CHECK-DAG: %r1 = COPY [[VREGVHI]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +entry: + %v = fadd double %p0, %p1 + ret double %v +} + +define arm_aapcscc void @test_indirect_call(void() *%fptr) { +; CHECK-LABEL: name: test_indirect_call +; CHECK: [[FPTR:%[0-9]+]](p0) = COPY %r0 +; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: BLX [[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +entry: + notail call arm_aapcscc void %fptr() + ret void +} + +declare arm_aapcscc void @call_target() + +define arm_aapcscc void @test_direct_call() { +; CHECK-LABEL: name: test_direct_call +; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: BLX @call_target, csr_aapcs, implicit-def %lr, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +entry: + notail call arm_aapcscc void @call_target() + ret void +} + +declare arm_aapcscc i32* @simple_reg_params_target(i32, i32*) + +define arm_aapcscc i32* @test_call_simple_reg_params(i32 *%a, i32 %b) { +; CHECK-LABEL: name: test_call_simple_reg_params +; CHECK-DAG: [[AVREG:%[0-9]+]](p0) = COPY %r0 +; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r1 +; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp +; CHECK-DAG: %r0 = COPY [[BVREG]] +; CHECK-DAG: %r1 = COPY [[AVREG]] +; CHECK: BLX @simple_reg_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0 +; CHECK: [[RVREG:%[0-9]+]](p0) = COPY %r0 +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %r0 = COPY [[RVREG]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %r = notail call arm_aapcscc i32 *@simple_reg_params_target(i32 %b, i32 *%a) + ret i32 *%r +} + +declare arm_aapcscc i32* @simple_stack_params_target(i32, i32*, i32, i32*, i32, i32*) + +define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) { +; CHECK-LABEL: name: test_call_simple_stack_params +; CHECK-DAG: [[AVREG:%[0-9]+]](p0) = COPY %r0 +; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r1 +; CHECK: ADJCALLSTACKDOWN 8, 14, _, implicit-def %sp, implicit %sp +; CHECK-DAG: %r0 = COPY [[BVREG]] +; CHECK-DAG: %r1 = COPY [[AVREG]] +; CHECK-DAG: %r2 = COPY [[BVREG]] +; CHECK-DAG: %r3 = COPY [[AVREG]] +; CHECK: [[SP1:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF1:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK: [[FI1:%[0-9]+]](p0) = G_GEP [[SP1]], [[OFF1]](s32) +; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: [[SP2:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF2:%[0-9]+]](s32) = G_CONSTANT i32 4 +; CHECK: [[FI2:%[0-9]+]](p0) = G_GEP [[SP2]], [[OFF2]](s32) +; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store 4 +; CHECK: BLX @simple_stack_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 +; CHECK: [[RVREG:%[0-9]+]](p0) = COPY %r0 +; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %r0 = COPY [[RVREG]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %r = notail call arm_aapcscc i32 *@simple_stack_params_target(i32 %b, i32 *%a, i32 %b, i32 *%a, i32 %b, i32 *%a) + ret i32 *%r +} + +declare arm_aapcscc signext i16 @ext_target(i8 signext, i8 zeroext, i16 signext, i16 zeroext, i8 signext, i8 zeroext, i16 signext, i16 zeroext, i1 zeroext) + +define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) { +; CHECK-LABEL: name: test_call_ext_params +; CHECK-DAG: [[AVREG:%[0-9]+]](s8) = COPY %r0 +; CHECK-DAG: [[BVREG:%[0-9]+]](s16) = COPY %r1 +; CHECK-DAG: [[CVREG:%[0-9]+]](s1) = COPY %r2 +; CHECK: ADJCALLSTACKDOWN 20, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[SEXTA:%[0-9]+]](s32) = G_SEXT [[AVREG]](s8) +; CHECK: %r0 = COPY [[SEXTA]] +; CHECK: [[ZEXTA:%[0-9]+]](s32) = G_ZEXT [[AVREG]](s8) +; CHECK: %r1 = COPY [[ZEXTA]] +; CHECK: [[SEXTB:%[0-9]+]](s32) = G_SEXT [[BVREG]](s16) +; CHECK: %r2 = COPY [[SEXTB]] +; CHECK: [[ZEXTB:%[0-9]+]](s32) = G_ZEXT [[BVREG]](s16) +; CHECK: %r3 = COPY [[ZEXTB]] +; CHECK: [[SP1:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF1:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK: [[FI1:%[0-9]+]](p0) = G_GEP [[SP1]], [[OFF1]](s32) +; CHECK: [[SEXTA2:%[0-9]+]](s32) = G_SEXT [[AVREG]] +; CHECK: G_STORE [[SEXTA2]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: [[SP2:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF2:%[0-9]+]](s32) = G_CONSTANT i32 4 +; CHECK: [[FI2:%[0-9]+]](p0) = G_GEP [[SP2]], [[OFF2]](s32) +; CHECK: [[ZEXTA2:%[0-9]+]](s32) = G_ZEXT [[AVREG]] +; CHECK: G_STORE [[ZEXTA2]](s32), [[FI2]](p0){{.*}}store 4 +; CHECK: [[SP3:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF3:%[0-9]+]](s32) = G_CONSTANT i32 8 +; CHECK: [[FI3:%[0-9]+]](p0) = G_GEP [[SP3]], [[OFF3]](s32) +; CHECK: [[SEXTB2:%[0-9]+]](s32) = G_SEXT [[BVREG]] +; CHECK: G_STORE [[SEXTB2]](s32), [[FI3]](p0){{.*}}store 4 +; CHECK: [[SP4:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF4:%[0-9]+]](s32) = G_CONSTANT i32 12 +; CHECK: [[FI4:%[0-9]+]](p0) = G_GEP [[SP4]], [[OFF4]](s32) +; CHECK: [[ZEXTB2:%[0-9]+]](s32) = G_ZEXT [[BVREG]] +; CHECK: G_STORE [[ZEXTB2]](s32), [[FI4]](p0){{.*}}store 4 +; CHECK: [[SP5:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF5:%[0-9]+]](s32) = G_CONSTANT i32 16 +; CHECK: [[FI5:%[0-9]+]](p0) = G_GEP [[SP5]], [[OFF5]](s32) +; CHECK: [[ZEXTC:%[0-9]+]](s32) = G_ZEXT [[CVREG]] +; CHECK: G_STORE [[ZEXTC]](s32), [[FI5]](p0){{.*}}store 4 +; CHECK: BLX @ext_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 +; CHECK: [[RVREG:%[0-9]+]](s16) = COPY %r0 +; CHECK: ADJCALLSTACKUP 20, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[RExtVREG:%[0-9]+]](s32) = G_SEXT [[RVREG]] +; CHECK: %r0 = COPY [[RExtVREG]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %r = notail call arm_aapcscc signext i16 @ext_target(i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i1 zeroext %c) + ret i16 %r +} + +declare arm_aapcs_vfpcc double @vfpcc_fp_target(float, double) + +define arm_aapcs_vfpcc double @test_call_vfpcc_fp_params(double %a, float %b) { +; CHECK-LABEL: name: test_call_vfpcc_fp_params +; CHECK-DAG: [[AVREG:%[0-9]+]](s64) = COPY %d0 +; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %s2 +; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp +; CHECK-DAG: %s0 = COPY [[BVREG]] +; CHECK-DAG: %d1 = COPY [[AVREG]] +; CHECK: BLX @vfpcc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %d1, implicit-def %d0 +; CHECK: [[RVREG:%[0-9]+]](s64) = COPY %d0 +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %d0 = COPY [[RVREG]] +; CHECK: BX_RET 14, _, implicit %d0 +entry: + %r = notail call arm_aapcs_vfpcc double @vfpcc_fp_target(float %b, double %a) + ret double %r +} + +declare arm_aapcscc double @aapcscc_fp_target(float, double, float, double) + +define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { +; CHECK-LABEL: name: test_call_aapcs_fp_params +; CHECK-DAG: [[A1:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[A2:%[0-9]+]](s32) = COPY %r1 +; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A1]](s32), 0, [[A2]](s32), 32 +; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A2]](s32), 0, [[A1]](s32), 32 +; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r2 +; CHECK: ADJCALLSTACKDOWN 16, 14, _, implicit-def %sp, implicit %sp +; CHECK-DAG: %r0 = COPY [[BVREG]] +; CHECK-DAG: [[A1:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 0 +; CHECK-DAG: [[A2:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 32 +; LITTLE-DAG: %r2 = COPY [[A1]] +; LITTLE-DAG: %r3 = COPY [[A2]] +; BIG-DAG: %r2 = COPY [[A2]] +; BIG-DAG: %r3 = COPY [[A1]] +; CHECK: [[SP1:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF1:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK: [[FI1:%[0-9]+]](p0) = G_GEP [[SP1]], [[OFF1]](s32) +; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: [[SP2:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF2:%[0-9]+]](s32) = G_CONSTANT i32 8 +; CHECK: [[FI2:%[0-9]+]](p0) = G_GEP [[SP2]], [[OFF2]](s32) +; CHECK: G_STORE [[AVREG]](s64), [[FI2]](p0){{.*}}store 8 +; CHECK: BLX @aapcscc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 +; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r1 +; LITTLE: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R1]](s32), 0, [[R2]](s32), 32 +; BIG: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R2]](s32), 0, [[R1]](s32), 32 +; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 0 +; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 32 +; LITTLE-DAG: %r0 = COPY [[R1]] +; LITTLE-DAG: %r1 = COPY [[R2]] +; BIG-DAG: %r0 = COPY [[R2]] +; BIG-DAG: %r1 = COPY [[R1]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +entry: + %r = notail call arm_aapcscc double @aapcscc_fp_target(float %b, double %a, float %b, double %a) + ret double %r +} + +declare arm_aapcscc float @different_call_conv_target(float) + +define arm_aapcs_vfpcc float @test_call_different_call_conv(float %x) { +; CHECK-LABEL: name: test_call_different_call_conv +; CHECK: [[X:%[0-9]+]](s32) = COPY %s0 +; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %r0 = COPY [[X]] +; CHECK: BLX @different_call_conv_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit-def %r0 +; CHECK: [[R:%[0-9]+]](s32) = COPY %r0 +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %s0 = COPY [[R]] +; CHECK: BX_RET 14, _, implicit %s0 +entry: + %r = notail call arm_aapcscc float @different_call_conv_target(float %x) + ret float %r +} + +define i32 @test_shufflevector_s32_v2s32(i32 %arg) { +; CHECK-LABEL: name: test_shufflevector_s32_v2s32 +; CHECK: [[ARG:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = IMPLICIT_DEF +; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32) +; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>) +; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>) + %vec = insertelement <1 x i32> undef, i32 %arg, i32 0 + %shuffle = shufflevector <1 x i32> %vec, <1 x i32> undef, <2 x i32> zeroinitializer + %res = extractelement <2 x i32> %shuffle, i32 0 + ret i32 %res +} + +define i32 @test_shufflevector_v2s32_v3s32(i32 %arg1, i32 %arg2) { +; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32 +; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1 +; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF +; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 +; CHECK-DAG: [[MASK:%[0-9]+]](<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32), [[C1]](s32) +; CHECK-DAG: [[V1:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32) +; CHECK-DAG: [[V2:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32) +; CHECK: [[VEC:%[0-9]+]](<3 x s32>) = G_SHUFFLE_VECTOR [[V2]](<2 x s32>), [[UNDEF]], [[MASK]](<3 x s32>) +; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<3 x s32>) + %v1 = insertelement <2 x i32> undef, i32 %arg1, i32 0 + %v2 = insertelement <2 x i32> %v1, i32 %arg2, i32 1 + %shuffle = shufflevector <2 x i32> %v2, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 1> + %res = extractelement <3 x i32> %shuffle, i32 0 + ret i32 %res +} + + +define i32 @test_shufflevector_v2s32_v4s32(i32 %arg1, i32 %arg2) { +; CHECK-LABEL: name: test_shufflevector_v2s32_v4s32 +; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1 +; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF +; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 +; CHECK-DAG: [[MASK:%[0-9]+]](<4 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32), [[C0]](s32), [[C0]](s32) +; CHECK-DAG: [[V1:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32) +; CHECK-DAG: [[V2:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32) +; CHECK: [[VEC:%[0-9]+]](<4 x s32>) = G_SHUFFLE_VECTOR [[V2]](<2 x s32>), [[UNDEF]], [[MASK]](<4 x s32>) +; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<4 x s32>) + %v1 = insertelement <2 x i32> undef, i32 %arg1, i32 0 + %v2 = insertelement <2 x i32> %v1, i32 %arg2, i32 1 + %shuffle = shufflevector <2 x i32> %v2, <2 x i32> undef, <4 x i32> zeroinitializer + %res = extractelement <4 x i32> %shuffle, i32 0 + ret i32 %res +} + +define i32 @test_shufflevector_v4s32_v2s32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) { +; CHECK-LABEL: name: test_shufflevector_v4s32_v2s32 +; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[ARG3:%[0-9]+]](s32) = COPY %r2 +; CHECK: [[ARG4:%[0-9]+]](s32) = COPY %r3 +; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = IMPLICIT_DEF +; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 +; CHECK-DAG: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 +; CHECK-DAG: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 +; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C3]](s32) +; CHECK-DAG: [[V1:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32) +; CHECK-DAG: [[V2:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32) +; CHECK-DAG: [[V3:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[V2]], [[ARG3]](s32), [[C2]](s32) +; CHECK-DAG: [[V4:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[V3]], [[ARG4]](s32), [[C3]](s32) +; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[V4]](<4 x s32>), [[UNDEF]], [[MASK]](<2 x s32>) +; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>) + %v1 = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %v2 = insertelement <4 x i32> %v1, i32 %arg2, i32 1 + %v3 = insertelement <4 x i32> %v2, i32 %arg3, i32 2 + %v4 = insertelement <4 x i32> %v3, i32 %arg4, i32 3 + %shuffle = shufflevector <4 x i32> %v4, <4 x i32> undef, <2 x i32> <i32 1, i32 3> + %res = extractelement <2 x i32> %shuffle, i32 0 + ret i32 %res +} diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll b/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll new file mode 100644 index 0000000000000..7d021fdb43dd9 --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple arm-linux-gnueabihf -mattr=+vfp2 -float-abi=hard -global-isel %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD +; RUN: llc -mtriple arm-linux-gnueabi -mattr=+vfp2,+soft-float -float-abi=soft -global-isel %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT-AEABI +; RUN: llc -mtriple arm-linux-gnu- -mattr=+vfp2,+soft-float -float-abi=soft -global-isel %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT-DEFAULT + +define arm_aapcscc float @test_frem_float(float %x, float %y) { +; CHECK-LABEL: test_frem_float: +; CHECK: blx fmodf + %r = frem float %x, %y + ret float %r +} + +define arm_aapcscc double @test_frem_double(double %x, double %y) { +; CHECK-LABEL: test_frem_double: +; CHECK: blx fmod + %r = frem double %x, %y + ret double %r +} + +declare float @llvm.pow.f32(float %x, float %y) +define arm_aapcscc float @test_fpow_float(float %x, float %y) { +; CHECK-LABEL: test_fpow_float: +; CHECK: blx powf + %r = call float @llvm.pow.f32(float %x, float %y) + ret float %r +} + +declare double @llvm.pow.f64(double %x, double %y) +define arm_aapcscc double @test_fpow_double(double %x, double %y) { +; CHECK-LABEL: test_fpow_double: +; CHECK: blx pow + %r = call double @llvm.pow.f64(double %x, double %y) + ret double %r +} + +define arm_aapcscc float @test_add_float(float %x, float %y) { +; CHECK-LABEL: test_add_float: +; HARD: vadd.f32 +; SOFT-AEABI: blx __aeabi_fadd +; SOFT-DEFAULT: blx __addsf3 + %r = fadd float %x, %y + ret float %r +} + +define arm_aapcscc double @test_add_double(double %x, double %y) { +; CHECK-LABEL: test_add_double: +; HARD: vadd.f64 +; SOFT-AEABI: blx __aeabi_dadd +; SOFT-DEFAULT: blx __adddf3 + %r = fadd double %x, %y + ret double %r +} diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll index 3f01b6dd3a830..236dcbeb84c52 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple arm-unknown -global-isel %s -o - | FileCheck %s +; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel %s -o - | FileCheck %s define void @test_void_return() { ; CHECK-LABEL: test_void_return: @@ -7,6 +7,39 @@ entry: ret void } +define zeroext i1 @test_zext_i1(i1 %x) { +; CHECK-LABEL: test_zext_i1 +; CHECK: and r0, r0, #1 +; CHECK: bx lr +entry: + ret i1 %x +} + +define signext i1 @test_sext_i1(i1 %x) { +; CHECK-LABEL: test_sext_i1 +; CHECK: and r0, r0, #1 +; CHECK: rsb r0, r0, #0 +; CHECK: bx lr +entry: + ret i1 %x +} + +define zeroext i8 @test_ext_i8(i8 %x) { +; CHECK-LABEL: test_ext_i8: +; CHECK: uxtb r0, r0 +; CHECK: bx lr +entry: + ret i8 %x +} + +define signext i16 @test_ext_i16(i16 %x) { +; CHECK-LABEL: test_ext_i16: +; CHECK: sxth r0, r0 +; CHECK: bx lr +entry: + ret i16 %x +} + define i8 @test_add_i8(i8 %x, i8 %y) { ; CHECK-LABEL: test_add_i8: ; CHECK: add r0, r0, r1 @@ -34,8 +67,8 @@ entry: ret i32 %sum } -define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { -; CHECK-LABEL: test_many_args: +define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { +; CHECK-LABEL: test_stack_args_i32: ; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4 ; CHECK: ldr [[P5:r[0-9]+]], {{.*}}[[P5ADDR]] ; CHECK: add r0, r2, [[P5]] @@ -44,3 +77,108 @@ entry: %sum = add i32 %p2, %p5 ret i32 %sum } + +define i16 @test_stack_args_mixed(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 %p4, i16 %p5) { +; CHECK-LABEL: test_stack_args_mixed: +; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4 +; CHECK: ldrh [[P5:r[0-9]+]], {{.*}}[[P5ADDR]] +; CHECK: add r0, r1, [[P5]] +; CHECK: bx lr +entry: + %sum = add i16 %p1, %p5 + ret i16 %sum +} + +define i16 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i16 zeroext %p4) { +; CHECK-LABEL: test_stack_args_zeroext: +; CHECK: mov [[P4ADDR:r[0-9]+]], sp +; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]] +; CHECK: add r0, r1, [[P4]] +; CHECK: bx lr +entry: + %sum = add i16 %p1, %p4 + ret i16 %sum +} + +define i8 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 signext %p4) { +; CHECK-LABEL: test_stack_args_signext: +; CHECK: mov [[P4ADDR:r[0-9]+]], sp +; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]] +; CHECK: add r0, r2, [[P4]] +; CHECK: bx lr +entry: + %sum = add i8 %p2, %p4 + ret i8 %sum +} + +define i32 @test_ptr_arg_in_reg(i32* %p) { +; CHECK-LABEL: test_ptr_arg_in_reg: +; CHECK: ldr r0, [r0] +; CHECK: bx lr +entry: + %v = load i32, i32* %p + ret i32 %v +} + +define i32 @test_ptr_arg_on_stack(i32 %f0, i32 %f1, i32 %f2, i32 %f3, i32* %p) { +; CHECK-LABEL: test_ptr_arg_on_stack: +; CHECK: mov r0, sp +; CHECK: ldr r0, [r0] +; CHECK: ldr r0, [r0] +; CHECK: bx lr +entry: + %v = load i32, i32* %p + ret i32 %v +} + +define i8* @test_ptr_ret(i8** %p) { +; CHECK-LABEL: test_ptr_ret: +; CHECK: ldr r0, [r0] +; CHECK: bx lr +entry: + %v = load i8*, i8** %p + ret i8* %v +} + +define arm_aapcs_vfpcc float @test_float_hard(float %f0, float %f1) { +; CHECK-LABEL: test_float_hard: +; CHECK: vadd.f32 s0, s0, s1 +; CHECK: bx lr +entry: + %v = fadd float %f0, %f1 + ret float %v +} + +define arm_aapcscc float @test_float_softfp(float %f0, float %f1) { +; CHECK-LABEL: test_float_softfp: +; CHECK-DAG: vmov [[F0:s[0-9]+]], r0 +; CHECK-DAG: vmov [[F1:s[0-9]+]], r1 +; CHECK: vadd.f32 [[FV:s[0-9]+]], [[F0]], [[F1]] +; CHECK: vmov r0, [[FV]] +; CHECK: bx lr +entry: + %v = fadd float %f0, %f1 + ret float %v +} + +define arm_aapcs_vfpcc double @test_double_hard(double %f0, double %f1) { +; CHECK-LABEL: test_double_hard: +; CHECK: vadd.f64 d0, d0, d1 +; CHECK: bx lr +entry: + %v = fadd double %f0, %f1 + ret double %v +} + +define arm_aapcscc double @test_double_softfp(double %f0, double %f1) { +; CHECK-LABEL: test_double_softfp: +; CHECK-DAG: vmov [[F0:d[0-9]+]], r0, r1 +; CHECK-DAG: vmov [[F1:d[0-9]+]], r2, r3 +; CHECK: vadd.f64 [[FV:d[0-9]+]], [[F0]], [[F1]] +; CHECK: vmov.32 r0, [[FV]][0] +; CHECK: vmov.32 r1, [[FV]][1] +; CHECK: bx lr +entry: + %v = fadd double %f0, %f1 + ret double %v +} diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir new file mode 100644 index 0000000000000..d154b4887c195 --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir @@ -0,0 +1,282 @@ +# RUN: llc -mtriple arm-linux-gnueabihf -mattr=+vfp2 -float-abi=hard -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD +# RUN: llc -mtriple arm-linux-gnueabi -mattr=+vfp2,+soft-float -float-abi=soft -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT -check-prefix SOFT-AEABI +# RUN: llc -mtriple arm-linux-gnu -mattr=+soft-float -float-abi=soft -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT -check-prefix SOFT-DEFAULT +--- | + define void @test_frem_float() { ret void } + define void @test_frem_double() { ret void } + + define void @test_fpow_float() { ret void } + define void @test_fpow_double() { ret void } + + define void @test_fadd_float() { ret void } + define void @test_fadd_double() { ret void } +... +--- +name: test_frem_float +# CHECK-LABEL: name: test_frem_float +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; CHECK: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X]] + ; SOFT-DAG: %r1 = COPY [[Y]] + ; HARD-DAG: %s0 = COPY [[X]] + ; HARD-DAG: %s1 = COPY [[Y]] + ; SOFT: BLX $fmodf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; HARD: BLX $fmodf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 + ; SOFT: [[R:%[0-9]+]](s32) = COPY %r0 + ; HARD: [[R:%[0-9]+]](s32) = COPY %s0 + ; CHECK: ADJCALLSTACKUP + %2(s32) = G_FREM %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_frem_double +# CHECK-LABEL: name: test_frem_double +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3 + + ; The inputs may be in the wrong order (depending on the target's + ; endianness), but that's orthogonal to what we're trying to test here. + ; For soft float, we only need to check that the first value, received + ; through R0-R1, ends up in R0-R1 or R1-R0, and the second value, received + ; through R2-R3, ends up in R2-R3 or R3-R2, when passed to fmod. + ; For hard float, the values need to end up in D0 and D1. + ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + %3(s32) = COPY %r3 + ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]] + %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 + %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32 + ; CHECK: ADJCALLSTACKDOWN + ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] + ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] + ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]] + ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] + ; HARD-DAG: %d0 = COPY [[X]] + ; HARD-DAG: %d1 = COPY [[Y]] + ; SOFT: BLX $fmod, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; HARD: BLX $fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 + ; CHECK: ADJCALLSTACKUP + %6(s64) = G_FREM %4, %5 + %7(s32) = G_EXTRACT %6(s64), 0 + %8(s32) = G_EXTRACT %6(s64), 32 + %r0 = COPY %7(s32) + %r1 = COPY %8(s32) + BX_RET 14, _, implicit %r0, implicit %r1 +... +--- +name: test_fpow_float +# CHECK-LABEL: name: test_fpow_float +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; CHECK: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X]] + ; SOFT-DAG: %r1 = COPY [[Y]] + ; HARD-DAG: %s0 = COPY [[X]] + ; HARD-DAG: %s1 = COPY [[Y]] + ; SOFT: BLX $powf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; HARD: BLX $powf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 + ; SOFT: [[R:%[0-9]+]](s32) = COPY %r0 + ; HARD: [[R:%[0-9]+]](s32) = COPY %s0 + ; CHECK: ADJCALLSTACKUP + %2(s32) = G_FPOW %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_fpow_double +# CHECK-LABEL: name: test_fpow_double +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3 + + ; The inputs may be in the wrong order (depending on the target's + ; endianness), but that's orthogonal to what we're trying to test here. + ; For soft float, we only need to check that the first value, received + ; through R0-R1, ends up in R0-R1 or R1-R0, and the second value, received + ; through R2-R3, ends up in R2-R3 or R3-R2, when passed to pow. + ; For hard float, the values need to end up in D0 and D1. + ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + %3(s32) = COPY %r3 + ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]] + %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 + %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32 + ; CHECK: ADJCALLSTACKDOWN + ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] + ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] + ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]] + ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] + ; HARD-DAG: %d0 = COPY [[X]] + ; HARD-DAG: %d1 = COPY [[Y]] + ; SOFT: BLX $pow, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; HARD: BLX $pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 + ; CHECK: ADJCALLSTACKUP + %6(s64) = G_FPOW %4, %5 + %7(s32) = G_EXTRACT %6(s64), 0 + %8(s32) = G_EXTRACT %6(s64), 32 + %r0 = COPY %7(s32) + %r1 = COPY %8(s32) + BX_RET 14, _, implicit %r0, implicit %r1 +... +--- +name: test_fadd_float +# CHECK-LABEL: name: test_fadd_float +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; HARD: [[R:%[0-9]+]](s32) = G_FADD [[X]], [[Y]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X]] + ; SOFT-DAG: %r1 = COPY [[Y]] + ; SOFT-AEABI: BLX $__aeabi_fadd, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BLX $__addsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + %2(s32) = G_FADD %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_fadd_double +# CHECK-LABEL: name: test_fadd_double +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3 + + ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + %3(s32) = COPY %r3 + ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]] + %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 + %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32 + ; HARD: [[R:%[0-9]+]](s64) = G_FADD [[X]], [[Y]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] + ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] + ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]] + ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] + ; SOFT-AEABI: BLX $__aeabi_dadd, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-DEFAULT: BLX $__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT: ADJCALLSTACKUP + %6(s64) = G_FADD %4, %5 + ; HARD-DAG: G_EXTRACT [[R]](s64), 0 + ; HARD-DAG: G_EXTRACT [[R]](s64), 32 + %7(s32) = G_EXTRACT %6(s64), 0 + %8(s32) = G_EXTRACT %6(s64), 32 + %r0 = COPY %7(s32) + %r1 = COPY %8(s32) + BX_RET 14, _, implicit %r0, implicit %r1 +... diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index 98d71c09e63ba..cbff7e12fb77c 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -1,10 +1,68 @@ # RUN: llc -mtriple arm-- -global-isel -run-pass=legalizer %s -o - | FileCheck %s --- | + define void @test_sext_s8() { ret void } + define void @test_zext_s16() { ret void } + define void @test_add_s8() { ret void } define void @test_add_s16() { ret void } define void @test_add_s32() { ret void } define void @test_load_from_stack() { ret void } + define void @test_legal_loads() #0 { ret void } + define void @test_legal_stores() #0 { ret void } + + define void @test_gep() { ret void } + + define void @test_constants() { ret void } + + define void @test_fadd_s32() #0 { ret void } + define void @test_fadd_s64() #0 { ret void } + + attributes #0 = { "target-features"="+vfp2" } +... +--- +name: test_sext_s8 +# CHECK-LABEL: name: test_sext_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %r0 + + %0(s8) = COPY %r0 + %1(s32) = G_SEXT %0 + ; G_SEXT with s8 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_SEXT {{%[0-9]+}} + %r0 = COPY %1(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_zext_s16 +# CHECK-LABEL: name: test_zext_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %r0 + + %0(s16) = COPY %r0 + %1(s32) = G_ZEXT %0 + ; G_ZEXT with s16 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_ZEXT {{%[0-9]+}} + %r0 = COPY %1(s32) + BX_RET 14, _, implicit %r0 ... --- name: test_add_s8 @@ -104,8 +162,179 @@ body: | ; This is legal, so we should find it unchanged in the output ; CHECK: [[FIVREG:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[FRAME_INDEX]] - ; CHECK: {{%[0-9]+}}(s32) = G_LOAD [[FIVREG]](p0) + ; CHECK: {{%[0-9]+}}(s32) = G_LOAD [[FIVREG]](p0) :: (load 4) %0(p0) = G_FRAME_INDEX %fixed-stack.2 - %1(s32) = G_LOAD %0(p0) + %1(s32) = G_LOAD %0(p0) :: (load 4) + BX_RET 14, _ +... +--- +name: test_legal_loads +# CHECK-LABEL: name: test_legal_loads +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3 + + ; These are all legal, so we should find them unchanged in the output + ; CHECK-DAG: {{%[0-9]+}}(s64) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}(s32) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}(s16) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}(s8) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}(s1) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}(p0) = G_LOAD %0 + %0(p0) = COPY %r0 + %1(s32) = G_LOAD %0(p0) :: (load 4) + %2(s16) = G_LOAD %0(p0) :: (load 2) + %3(s8) = G_LOAD %0(p0) :: (load 1) + %4(s1) = G_LOAD %0(p0) :: (load 1) + %5(p0) = G_LOAD %0(p0) :: (load 4) + %6(s64) = G_LOAD %0(p0) :: (load 8) + BX_RET 14, _ +... +--- +name: test_legal_stores +# CHECK-LABEL: name: test_legal_stores +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3, %r4, %r5, %r6, %d1 + + ; These are all legal, so we should find them unchanged in the output + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s64), %0(p0) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s32), %0(p0) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s16), %0(p0) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s8), %0(p0) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s1), %0(p0) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(p0), %0(p0) + %0(p0) = COPY %r0 + %1(s64) = COPY %d1 + G_STORE %1(s64), %0(p0) :: (store 8) + %2(s32) = COPY %r2 + G_STORE %2(s32), %0(p0) :: (store 4) + %3(s16) = COPY %r3 + G_STORE %3(s16), %0(p0) :: (store 2) + %4(s8) = COPY %r4 + G_STORE %4(s8), %0(p0) :: (store 1) + %5(s1) = COPY %r5 + G_STORE %5(s1), %0(p0) :: (store 1) + %6(p0) = COPY %r6 + G_STORE %6(p0), %0(p0) :: (store 4) BX_RET 14, _ ... +--- +name: test_gep +# CHECK-LABEL: name: test_gep +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(p0) = COPY %r0 + %1(s32) = COPY %r1 + + ; CHECK: {{%[0-9]+}}(p0) = G_GEP {{%[0-9]+}}, {{%[0-9]+}}(s32) + %2(p0) = G_GEP %0, %1(s32) + + %r0 = COPY %2(p0) + BX_RET 14, _, implicit %r0 +... +--- +name: test_constants +# CHECK-LABEL: name: test_constants +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } +body: | + bb.0: + %0(s32) = G_CONSTANT 42 + ; CHECK: {{%[0-9]+}}(s32) = G_CONSTANT 42 + + %r0 = COPY %0(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_fadd_s32 +# CHECK-LABEL: name: test_fadd_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_FADD %0, %1 + ; G_FADD with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_FADD {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_fadd_s64 +# CHECK-LABEL: name: test_fadd_s64 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %d0, %d1 + + %0(s64) = COPY %d0 + %1(s64) = COPY %d1 + %2(s64) = G_FADD %0, %1 + ; G_FADD with s64 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s64) = G_FADD {{%[0-9]+, %[0-9]+}} + %d0 = COPY %2(s64) + BX_RET 14, _, implicit %d0 + +... diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index ce0601021e62f..fbf8d81322f8f 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -3,6 +3,23 @@ define void @test_add_s32() { ret void } define void @test_add_s16() { ret void } define void @test_add_s8() { ret void } + define void @test_add_s1() { ret void } + + define void @test_loads() #0 { ret void } + define void @test_stores() #0 { ret void } + + define void @test_stack() { ret void } + + define void @test_gep() { ret void } + + define void @test_constants() { ret void } + + define void @test_fadd_s32() #0 { ret void } + define void @test_fadd_s64() #0 { ret void } + + define void @test_soft_fp_s64() #0 { ret void } + + attributes #0 = { "target-features"="+vfp2"} ... --- name: test_add_s32 @@ -82,3 +99,266 @@ body: | BX_RET 14, _, implicit %r0 ... +--- +name: test_add_s1 +# CHECK-LABEL: name: test_add_s1 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s1) = COPY %r0 + %1(s1) = COPY %r1 + %2(s1) = G_ADD %0, %1 + %r0 = COPY %2(s1) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_loads +# CHECK-LABEL: name: test_loads +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 3, class: gprb } +# CHECK: - { id: 4, class: gprb } +# CHECK: - { id: 5, class: gprb } +# CHECK: - { id: 6, class: fprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } +body: | + bb.0: + liveins: %r0 + %0(p0) = COPY %r0 + %6(s64) = G_LOAD %0 :: (load 8) + %1(s32) = G_LOAD %0 :: (load 4) + %2(s16) = G_LOAD %0 :: (load 2) + %3(s8) = G_LOAD %0 :: (load 1) + %4(s1) = G_LOAD %0 :: (load 1) + %5(p0) = G_LOAD %0 :: (load 4) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_stores +# CHECK-LABEL: name: test_stores +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 3, class: gprb } +# CHECK: - { id: 4, class: gprb } +# CHECK: - { id: 5, class: gprb } +# CHECK: - { id: 6, class: fprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3, %r4, %r5, %d6 + %0(p0) = COPY %r0 + %1(s32) = COPY %r1 + G_STORE %1(s32), %0 :: (store 4) + %2(s16) = COPY %r2 + G_STORE %2(s16), %0 :: (store 2) + %3(s8) = COPY %r3 + G_STORE %3(s8), %0 :: (store 1) + %4(s1) = COPY %r4 + G_STORE %4(s1), %0 :: (store 1) + %5(p0) = COPY %r5 + G_STORE %5(p0), %0 :: (store 4) + %6(s64) = COPY %d6 + G_STORE %6(s64), %0 :: (store 8) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_stack +# CHECK-LABEL: name: test_stack +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 3, class: gprb } +# CHECK: - { id: 4, class: gprb } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +fixedStack: + - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false } +body: | + bb.0: + %0(p0) = G_FRAME_INDEX %fixed-stack.0 + %1(s32) = G_LOAD %0(p0) :: (load 4 from %fixed-stack.0, align 0) + + %2(p0) = COPY %sp + %3(s32) = G_CONSTANT i32 8 + %4(p0) = G_GEP %2, %3(s32) + G_STORE %1(s32), %4(p0) :: (store 4) + + BX_RET 14, _ + +... +--- +name: test_gep +# CHECK-LABEL: name: test_gep +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: gprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(p0) = COPY %r0 + %1(s32) = COPY %r1 + %2(p0) = G_GEP %0, %1(s32) + %r0 = COPY %2(p0) + BX_RET 14, _, implicit %r0 +... +--- +name: test_constants +# CHECK-LABEL: name: test_constants +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +registers: + - { id: 0, class: _ } +body: | + bb.0: + %0(s32) = G_CONSTANT 42 + %r0 = COPY %0(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_fadd_s32 +# CHECK-LABEL: name: test_fadd_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: fprb } +# CHECK: - { id: 1, class: fprb } +# CHECK: - { id: 2, class: fprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %s0, %s1 + + %0(s32) = COPY %s0 + %1(s32) = COPY %s1 + %2(s32) = G_FADD %0, %1 + %s0 = COPY %2(s32) + BX_RET 14, _, implicit %s0 + +... +--- +name: test_fadd_s64 +# CHECK-LABEL: name: test_fadd_s64 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: fprb } +# CHECK: - { id: 1, class: fprb } +# CHECK: - { id: 2, class: fprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %d0, %d1 + + %0(s64) = COPY %d0 + %1(s64) = COPY %d1 + %2(s64) = G_FADD %0, %1 + %d0 = COPY %2(s64) + BX_RET 14, _, implicit %d0 + +... +--- +name: test_soft_fp_s64 +# CHECK-LABEL: name: test_soft_fp_s64 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 2, class: fprb } +# CHECK: - { id: 3, class: gprb } +# CHECK: - { id: 4, class: gprb } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 + %3(s32) = G_EXTRACT %2(s64), 0 + %4(s32) = G_EXTRACT %2(s64), 32 + %r0 = COPY %3(s32) + %r1 = COPY %4(s32) + BX_RET 14, _, implicit %r0, implicit %r1 + +... diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll index 7d37c83d74838..0e077b3aee5a1 100644 --- a/test/CodeGen/ARM/alloc-no-stack-realign.ll +++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=NO-REALIGN -; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=REALIGN +; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s ; rdar://12713765 ; When realign-stack is set to false, make sure we are not creating stack @@ -8,29 +7,31 @@ define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" { entry: -; NO-REALIGN-LABEL: test1 -; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]] -; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32 -; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48 -; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] - -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: mov r[[R3:[0-9]+]], r[[R1]] -; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]! -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128] - -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]! -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128] +; CHECK-LABEL: test1 +; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] +; CHECK: add r[[R2:[0-9]+]], r1, #48 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], sp +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] %retval = alloca <16 x float>, align 16 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 store <16 x float> %0, <16 x float>* %retval @@ -41,32 +42,33 @@ entry: define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp { entry: -; REALIGN-LABEL: test2 -; REALIGN: bfc sp, #0, #6 -; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]] -; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32 -; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48 -; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], sp +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] -; REALIGN: orr r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #32 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #16 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] - -; REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]! -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128] - %retval = alloca <16 x float>, align 16 +%retval = alloca <16 x float>, align 16 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 store <16 x float> %0, <16 x float>* %retval %1 = load <16 x float>, <16 x float>* %retval diff --git a/test/CodeGen/ARM/arg-copy-elide.ll b/test/CodeGen/ARM/arg-copy-elide.ll new file mode 100644 index 0000000000000..739b560b0833f --- /dev/null +++ b/test/CodeGen/ARM/arg-copy-elide.ll @@ -0,0 +1,61 @@ +; RUN: llc -mtriple=armv7-linux < %s | FileCheck %s + +declare arm_aapcscc void @addrof_i32(i32*) +declare arm_aapcscc void @addrof_i64(i64*) + +define arm_aapcscc void @simple(i32, i32, i32, i32, i32 %x) { +entry: + %x.addr = alloca i32 + store i32 %x, i32* %x.addr + call void @addrof_i32(i32* %x.addr) + ret void +} + +; CHECK-LABEL: simple: +; CHECK: push {r11, lr} +; CHECK: add r0, sp, #8 +; CHECK: bl addrof_i32 +; CHECK: pop {r11, pc} + + +; We need to load %x before calling addrof_i32 now because it could mutate %x in +; place. + +define arm_aapcscc i32 @use_arg(i32, i32, i32, i32, i32 %x) { +entry: + %x.addr = alloca i32 + store i32 %x, i32* %x.addr + call void @addrof_i32(i32* %x.addr) + ret i32 %x +} + +; CHECK-LABEL: use_arg: +; CHECK: push {[[csr:[^ ]*]], lr} +; CHECK: ldr [[csr]], [sp, #8] +; CHECK: add r0, sp, #8 +; CHECK: bl addrof_i32 +; CHECK: mov r0, [[csr]] +; CHECK: pop {[[csr]], pc} + + +define arm_aapcscc i64 @split_i64(i32, i32, i32, i32, i64 %x) { +entry: + %x.addr = alloca i64, align 4 + store i64 %x, i64* %x.addr, align 4 + call void @addrof_i64(i64* %x.addr) + ret i64 %x +} + +; CHECK-LABEL: split_i64: +; CHECK: push {r4, r5, r11, lr} +; CHECK: sub sp, sp, #8 +; CHECK: ldr r4, [sp, #28] +; CHECK: ldr r5, [sp, #24] +; CHECK: mov r0, sp +; CHECK: str r4, [sp, #4] +; CHECK: str r5, [sp] +; CHECK: bl addrof_i64 +; CHECK: mov r0, r5 +; CHECK: mov r1, r4 +; CHECK: add sp, sp, #8 +; CHECK: pop {r4, r5, r11, pc} diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 9bd2077e4d037..31691e9468c9e 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -1,7 +1,6 @@ ; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=ARM %s ; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=THUMB %s -; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ -; RUN: | FileCheck -check-prefix=T2 %s +; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck -check-prefix=T2 %s ; RUN: llc -mtriple=thumbv8-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=V8 %s ; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified. @@ -49,9 +48,9 @@ tailrecurse.switch: ; preds = %tailrecurse ; V8-NEXT: beq ; V8-NEXT: %tailrecurse.switch ; V8: cmp -; V8-NEXT: bne -; V8-NEXT: b -; The trailing space in the last line checks that the branch is unconditional +; V8-NEXT: beq +; V8-NEXT: %sw.epilog +; V8-NEXT: bx lr switch i32 %and, label %sw.epilog [ i32 1, label %sw.bb i32 3, label %sw.bb6 @@ -93,7 +92,7 @@ entry: %1 = load i8, i8* %0, align 1 %2 = zext i8 %1 to i32 ; ARM: ands -; THUMB: ands +; THUMB: ands ; T2: ands ; V8: ands ; V8-NEXT: beq @@ -141,19 +140,48 @@ return: ; preds = %bb2, %bb, %entry ; folding of unrelated tests (in this case, a TST against r1 was eliminated in ; favour of an AND of r0). +define i32 @test_tst_assessment(i32 %a, i32 %b) { ; ARM-LABEL: test_tst_assessment: +; ARM: @ BB#0: +; ARM-NEXT: and r0, r0, #1 +; ARM-NEXT: tst r1, #1 +; ARM-NEXT: subne r0, r0, #1 +; ARM-NEXT: mov pc, lr +; ; THUMB-LABEL: test_tst_assessment: +; THUMB: @ BB#0: +; THUMB-NEXT: movs r2, r0 +; THUMB-NEXT: movs r0, #1 +; THUMB-NEXT: ands r0, r2 +; THUMB-NEXT: subs r2, r0, #1 +; THUMB-NEXT: lsls r1, r1, #31 +; THUMB-NEXT: beq .LBB2_2 +; THUMB-NEXT: @ BB#1: +; THUMB-NEXT: movs r0, r2 +; THUMB-NEXT: .LBB2_2: +; THUMB-NEXT: bx lr +; ; T2-LABEL: test_tst_assessment: +; T2: @ BB#0: +; T2-NEXT: lsls r1, r1, #31 +; T2-NEXT: and r0, r0, #1 +; T2-NEXT: it ne +; T2-NEXT: subne r0, #1 +; T2-NEXT: bx lr +; ; V8-LABEL: test_tst_assessment: -define i32 @test_tst_assessment(i1 %lhs, i1 %rhs) { - %lhs32 = zext i1 %lhs to i32 - %rhs32 = zext i1 %rhs to i32 - %diff = sub nsw i32 %lhs32, %rhs32 -; ARM: tst r1, #1 -; THUMB: lsls r1, r1, #31 -; T2: lsls r1, r1, #31 -; V8: lsls r1, r1, #31 - ret i32 %diff +; V8: @ BB#0: +; V8-NEXT: lsls r1, r1, #31 +; V8-NEXT: and r0, r0, #1 +; V8-NEXT: it ne +; V8-NEXT: subne r0, #1 +; V8-NEXT: bx lr + %and1 = and i32 %a, 1 + %sub = sub i32 %and1, 1 + %and2 = and i32 %b, 1 + %cmp = icmp eq i32 %and2, 0 + %sel = select i1 %cmp, i32 %and1, i32 %sub + ret i32 %sel } !1 = !{!"branch_weights", i32 1, i32 1, i32 3, i32 2 } diff --git a/test/CodeGen/ARM/arm-position-independence.ll b/test/CodeGen/ARM/arm-position-independence.ll index 02a63984ad6f4..4aa817f7a4814 100644 --- a/test/CodeGen/ARM/arm-position-independence.ll +++ b/test/CodeGen/ARM/arm-position-independence.ll @@ -13,6 +13,12 @@ ; RUN: llc -relocation-model=rwpi -mtriple=thumbv6m--none-eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1_RO_ABS --check-prefix=THUMB1_RW_SB ; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1_RO_PC --check-prefix=THUMB1_RW_SB +; RUN: llc -relocation-model=rwpi -mtriple=armv7a--none-eabi -mattr=no-movt < %s | FileCheck %s --check-prefix=CHECK --check-prefix=NO_MOVT_ARM_RO_ABS --check-prefix=NO_MOVT_ARM_RW_SB +; RUN: llc -relocation-model=ropi-rwpi -mtriple=armv7a--none-eabi -mattr=no-movt < %s | FileCheck %s --check-prefix=CHECK --check-prefix=NO_MOVT_ARM_RO_PC --check-prefix=NO_MOVT_ARM_RW_SB + +; RUN: llc -relocation-model=rwpi -mtriple=thumbv7m--none-eabi -mattr=no-movt < %s | FileCheck %s --check-prefix=CHECK --check-prefix=NO_MOVT_THUMB2_RO_ABS --check-prefix=NO_MOVT_THUMB2_RW_SB +; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv7m--none-eabi -mattr=no-movt < %s | FileCheck %s --check-prefix=CHECK --check-prefix=NO_MOVT_THUMB2_RO_PC --check-prefix=NO_MOVT_THUMB2_RW_SB + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" @a = external global i32, align 4 @@ -28,16 +34,24 @@ entry: ; ARM_RW_ABS: movt r[[REG]], :upper16:a ; ARM_RW_ABS: ldr r0, [r[[REG]]] -; ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; ARM_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel) +; ARM_RW_SB: movt r[[REG]], :upper16:a(sbrel) ; ARM_RW_SB: ldr r0, [r9, r[[REG]]] +; NO_MOVT_ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_ARM_RW_SB: ldr r0, [r9, r[[REG]]] + ; THUMB2_RW_ABS: movw r[[REG:[0-9]]], :lower16:a ; THUMB2_RW_ABS: movt r[[REG]], :upper16:a ; THUMB2_RW_ABS: ldr r0, [r[[REG]]] -; THUMB2_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; THUMB2_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel) +; THUMB2_RW_SB: movt r[[REG]], :upper16:a(sbrel) ; THUMB2_RW_SB: ldr.w r0, [r9, r[[REG]]] +; NO_MOVT_THUMB2_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_THUMB2_RW_SB: ldr.w r0, [r9, r[[REG]]] + ; THUMB1_RW_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] ; THUMB1_RW_ABS: ldr r0, [r[[REG]]] @@ -47,11 +61,11 @@ entry: ; CHECK: {{(bx lr|pop)}} -; ARM_RW_SB: [[LCPI]] -; ARM_RW_SB: .long a(sbrel) +; NO_MOVT_ARM_RW_SB: [[LCPI]] +; NO_MOVT_ARM_RW_SB: .long a(sbrel) -; THUMB2_RW_SB: [[LCPI]] -; THUMB2_RW_SB: .long a(sbrel) +; NO_MOVT_THUMB2_RW_SB: [[LCPI]] +; NO_MOVT_THUMB2_RW_SB: .long a(sbrel) ; THUMB1_RW_ABS: [[LCPI]] ; THUMB1_RW_ABS-NEXT: .long a @@ -70,16 +84,24 @@ entry: ; ARM_RW_ABS: movt r[[REG]], :upper16:a ; ARM_RW_ABS: str r0, [r[[REG:[0-9]]]] -; ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] -; ARM_RW_SB: str r0, [r9, r[[REG]]] +; ARM_RW_SB: movw r[[REG:[0-9]]], :lower16:a +; ARM_RW_SB: movt r[[REG]], :upper16:a +; ARM_RW_SB: str r0, [r9, r[[REG:[0-9]]]] + +; NO_MOVT_ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_ARM_RW_SB: str r0, [r9, r[[REG]]] ; THUMB2_RW_ABS: movw r[[REG:[0-9]]], :lower16:a ; THUMB2_RW_ABS: movt r[[REG]], :upper16:a ; THUMB2_RW_ABS: str r0, [r[[REG]]] -; THUMB2_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; THUMB2_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel) +; THUMB2_RW_SB: movt r[[REG]], :upper16:a(sbrel) ; THUMB2_RW_SB: str.w r0, [r9, r[[REG]]] +; NO_MOVT_THUMB2_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_THUMB2_RW_SB: str.w r0, [r9, r[[REG]]] + ; THUMB1_RW_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] ; THUMB1_RW_ABS: str r0, [r[[REG]]] @@ -89,11 +111,11 @@ entry: ; CHECK: {{(bx lr|pop)}} -; ARM_RW_SB: [[LCPI]] -; ARM_RW_SB: .long a(sbrel) +; NO_MOVT_ARM_RW_SB: [[LCPI]] +; NO_MOVT_ARM_RW_SB: .long a(sbrel) -; THUMB2_RW_SB: [[LCPI]] -; THUMB2_RW_SB: .long a(sbrel) +; NO_MOVT_THUMB2_RW_SB: [[LCPI]] +; NO_MOVT_THUMB2_RW_SB: .long a(sbrel) ; THUMB1_RW_ABS: [[LCPI]] ; THUMB1_RW_ABS-NEXT: .long a @@ -112,21 +134,37 @@ entry: ; ARM_RO_ABS: movt r[[reg]], :upper16:b ; ARM_RO_ABS: ldr r0, [r[[reg]]] +; NO_MOVT_ARM_RO_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_ARM_RO_ABS: ldr r0, [r[[REG]]] + ; ARM_RO_PC: movw r[[REG:[0-9]]], :lower16:(b-([[LPC:.LPC[0-9]+_[0-9]+]]+8)) ; ARM_RO_PC: movt r[[REG]], :upper16:(b-([[LPC]]+8)) ; ARM_RO_PC: [[LPC]]: ; ARM_RO_PC-NEXT: ldr r0, [pc, r[[REG]]] +; NO_MOVT_ARM_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_ARM_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]: +; NO_MOVT_ARM_RO_PC: ldr r0, [pc, r[[REG]]] + ; THUMB2_RO_ABS: movw r[[REG:[0-9]]], :lower16:b ; THUMB2_RO_ABS: movt r[[REG]], :upper16:b ; THUMB2_RO_ABS: ldr r0, [r[[REG]]] +; NO_MOVT_THUMB2_RO_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_THUMB2_RO_ABS: ldr r0, [r[[REG]]] + ; THUMB2_RO_PC: movw r[[REG:[0-9]]], :lower16:(b-([[LPC:.LPC[0-9]+_[0-9]+]]+4)) ; THUMB2_RO_PC: movt r[[REG]], :upper16:(b-([[LPC]]+4)) ; THUMB2_RO_PC: [[LPC]]: ; THUMB2_RO_PC-NEXT: add r[[REG]], pc ; THUMB2_RO_PC: ldr r0, [r[[REG]]] +; NO_MOVT_THUMB2_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_THUMB2_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]: +; NO_MOVT_THUMB2_RO_PC-NEXT: add r[[REG]], pc +; NO_MOVT_THUMB2_RO_PC: ldr r0, [r[[REG]]] + + ; THUMB1_RO_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] ; THUMB1_RO_ABS: ldr r0, [r[[REG]]] @@ -137,9 +175,21 @@ entry: ; CHECK: {{(bx lr|pop)}} +; NO_MOVT_ARM_RO_ABS: [[LCPI]] +; NO_MOVT_ARM_RO_ABS-NEXT: .long b + +; NO_MOVT_THUMB2_RO_ABS: [[LCPI]] +; NO_MOVT_THUMB2_RO_ABS-NEXT: .long b + ; THUMB1_RO_ABS: [[LCPI]] ; THUMB1_RO_ABS-NEXT: .long b +; NO_MOVT_ARM_RO_PC: [[LCPI]] +; NO_MOVT_ARM_RO_PC-NEXT: .long b-([[LPC]]+8) + +; NO_MOVT_THUMB2_RO_PC: [[LCPI]] +; NO_MOVT_THUMB2_RO_PC-NEXT: .long b-([[LPC]]+4) + ; THUMB1_RO_PC: [[LCPI]] ; THUMB1_RO_PC-NEXT: .long b-([[LPC]]+4) } @@ -152,15 +202,23 @@ entry: ; ARM_RW_ABS: movw r[[REG:[0-9]]], :lower16:a ; ARM_RW_ABS: movt r[[REG]], :upper16:a -; ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; ARM_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel) +; ARM_RW_SB: movt r[[REG]], :upper16:a(sbrel) ; ARM_RW_SB: add r0, r9, r[[REG]] +; NO_MOVT_ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_ARM_RW_SB: add r0, r9, r[[REG]] + ; THUMB2_RW_ABS: movw r[[REG:[0-9]]], :lower16:a ; THUMB2_RW_ABS: movt r[[REG]], :upper16:a -; THUMB2_RW_SB: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]] +; THUMB2_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel) +; THUMB2_RW_SB: movt r[[REG]], :upper16:a(sbrel) ; THUMB2_RW_SB: add r0, r9 +; NO_MOVT_THUMB2_RW_SB: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_THUMB2_RW_SB: add r0, r9 + ; THUMB1_RW_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]] ; THUMB1_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] @@ -169,11 +227,11 @@ entry: ; CHECK: {{(bx lr|pop)}} -; ARM_RW_SB: [[LCPI]] -; ARM_RW_SB: .long a(sbrel) +; NO_MOVT_ARM_RW_SB: [[LCPI]] +; NO_MOVT_ARM_RW_SB: .long a(sbrel) -; THUMB2_RW_SB: [[LCPI]] -; THUMB2_RW_SB: .long a(sbrel) +; NO_MOVT_THUMB2_RW_SB: [[LCPI]] +; NO_MOVT_THUMB2_RW_SB: .long a(sbrel) ; THUMB1_RW_ABS: [[LCPI]] ; THUMB1_RW_ABS-NEXT: .long a @@ -190,19 +248,31 @@ entry: ; ARM_RO_ABS: movw r[[REG:[0-9]]], :lower16:b ; ARM_RO_ABS: movt r[[REG]], :upper16:b +; NO_MOVT_ARM_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]] + ; ARM_RO_PC: movw r[[REG:[0-9]]], :lower16:(b-([[LPC:.LPC[0-9]+_[0-9]+]]+8)) ; ARM_RO_PC: movt r[[REG]], :upper16:(b-([[LPC]]+8)) ; ARM_RO_PC: [[LPC]]: ; ARM_RO_PC-NEXT: add r0, pc, r[[REG:[0-9]]] +; NO_MOVT_ARM_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_ARM_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]: +; NO_MOVT_ARM_RO_PC-NEXT: add r0, pc, r[[REG]] + ; THUMB2_RO_ABS: movw r[[REG:[0-9]]], :lower16:b ; THUMB2_RO_ABS: movt r[[REG]], :upper16:b +; NO_MOVT_THUMB2_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]] + ; THUMB2_RO_PC: movw r0, :lower16:(b-([[LPC:.LPC[0-9]+_[0-9]+]]+4)) ; THUMB2_RO_PC: movt r0, :upper16:(b-([[LPC]]+4)) ; THUMB2_RO_PC: [[LPC]]: ; THUMB2_RO_PC-NEXT: add r0, pc +; NO_MOVT_THUMB2_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_THUMB2_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]: +; NO_MOVT_THUMB2_RO_PC-NEXT: add r[[REG]], pc + ; THUMB1_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]] ; THUMB1_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] @@ -211,9 +281,21 @@ entry: ; CHECK: {{(bx lr|pop)}} +; NO_MOVT_ARM_RO_ABS: [[LCPI]] +; NO_MOVT_ARM_RO_ABS-NEXT: .long b + +; NO_MOVT_THUMB2_RO_ABS: [[LCPI]] +; NO_MOVT_THUMB2_RO_ABS-NEXT: .long b + ; THUMB1_RO_ABS: [[LCPI]] ; THUMB1_RO_ABS-NEXT: .long b +; NO_MOVT_ARM_RO_PC: [[LCPI]] +; NO_MOVT_ARM_RO_PC-NEXT: .long b-([[LPC]]+8) + +; NO_MOVT_THUMB2_RO_PC: [[LCPI]] +; NO_MOVT_THUMB2_RO_PC-NEXT: .long b-([[LPC]]+4) + ; THUMB1_RO_PC: [[LCPI]] ; THUMB1_RO_PC-NEXT: .long b-([[LPC]]+4) } @@ -226,19 +308,31 @@ entry: ; ARM_RO_ABS: movw r[[REG:[0-9]]], :lower16:take_addr_func ; ARM_RO_ABS: movt r[[REG]], :upper16:take_addr_func +; NO_MOVT_ARM_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]] + ; ARM_RO_PC: movw r[[REG:[0-9]]], :lower16:(take_addr_func-([[LPC:.LPC[0-9]+_[0-9]+]]+8)) ; ARM_RO_PC: movt r[[REG]], :upper16:(take_addr_func-([[LPC]]+8)) ; ARM_RO_PC: [[LPC]]: ; ARM_RO_PC-NEXT: add r0, pc, r[[REG:[0-9]]] +; NO_MOVT_ARM_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_ARM_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]: +; NO_MOVT_ARM_RO_PC-NEXT: add r0, pc, r[[REG]] + ; THUMB2_RO_ABS: movw r[[REG:[0-9]]], :lower16:take_addr_func ; THUMB2_RO_ABS: movt r[[REG]], :upper16:take_addr_func +; NO_MOVT_THUMB2_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]] + ; THUMB2_RO_PC: movw r0, :lower16:(take_addr_func-([[LPC:.LPC[0-9]+_[0-9]+]]+4)) ; THUMB2_RO_PC: movt r0, :upper16:(take_addr_func-([[LPC]]+4)) ; THUMB2_RO_PC: [[LPC]]: ; THUMB2_RO_PC-NEXT: add r0, pc +; NO_MOVT_THUMB2_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] +; NO_MOVT_THUMB2_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]: +; NO_MOVT_THUMB2_RO_PC-NEXT: add r[[REG]], pc + ; THUMB1_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]] ; THUMB1_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]] @@ -247,9 +341,21 @@ entry: ; CHECK: {{(bx lr|pop)}} +; NO_MOVT_ARM_RO_ABS: [[LCPI]] +; NO_MOVT_ARM_RO_ABS-NEXT: .long take_addr_func + +; NO_MOVT_THUMB2_RO_ABS: [[LCPI]] +; NO_MOVT_THUMB2_RO_ABS-NEXT: .long take_addr_func + ; THUMB1_RO_ABS: [[LCPI]] ; THUMB1_RO_ABS-NEXT: .long take_addr_func +; NO_MOVT_ARM_RO_PC: [[LCPI]] +; NO_MOVT_ARM_RO_PC-NEXT: .long take_addr_func-([[LPC]]+8) + +; NO_MOVT_THUMB2_RO_PC: [[LCPI]] +; NO_MOVT_THUMB2_RO_PC-NEXT: .long take_addr_func-([[LPC]]+4) + ; THUMB1_RO_PC: [[LCPI]] ; THUMB1_RO_PC-NEXT: .long take_addr_func-([[LPC]]+4) } diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll index 364bd5d13691e..e026bae361e19 100644 --- a/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -24,14 +24,12 @@ entry: ; CHECK-THUMB-LABEL: test_cmpxchg_res_i8 ; CHECK-THUMB: bl __sync_val_compare_and_swap_1 ; CHECK-THUMB-NOT: mov [[R1:r[0-7]]], r0 -; CHECK-THUMB: push {r0} -; CHECK-THUMB: pop {[[R1:r[0-7]]]} +; CHECK-THUMB: movs [[R1:r[0-7]]], r0 ; CHECK-THUMB: movs r0, #1 ; CHECK-THUMB: movs [[R2:r[0-9]+]], #0 ; CHECK-THUMB: cmp [[R1]], {{r[0-9]+}} ; CHECK-THUMB: beq -; CHECK-THUMB: push {[[R2]]} -; CHECK-THUMB: pop {r0} +; CHECK-THUMB: movs r0, [[R2]] ; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8: ; CHECK-ARMV6-NEXT: .fnstart @@ -66,14 +64,14 @@ entry: ; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]: ; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1 +; CHECK-ARMV7-NEXT: moveq r0, #1 ; CHECK-ARMV7-NEXT: bxeq lr ; CHECK-ARMV7-NEXT: [[TRY]]: -; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] -; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]] +; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0] +; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1 ; CHECK-ARMV7-NEXT: beq [[HEAD]] ; CHECK-ARMV7-NEXT: clrex -; CHECK-ARMV7-NEXT: mov [[RES]], #0 +; CHECK-ARMV7-NEXT: mov r0, #0 ; CHECK-ARMV7-NEXT: bx lr ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll index e6a4949d53ce4..23c4ccea46046 100644 --- a/test/CodeGen/ARM/atomic-op.ll +++ b/test/CodeGen/ARM/atomic-op.ll @@ -320,10 +320,10 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) { ; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] ; CHECK: cmp [[SUCCESS]], #0 ; CHECK: bne [[LOOP_BB]] -; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]] +; CHECK: dmb ish +; CHECK: bx lr ; CHECK: [[FAIL_BB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[END_BB]]: ; CHECK: dmb ish ; CHECK: bx lr diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll index 77b850bd617b8..d1575ed12e4e1 100644 --- a/test/CodeGen/ARM/atomic-ops-v8.ll +++ b/test/CodeGen/ARM/atomic-ops-v8.ll @@ -1045,20 +1045,21 @@ define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind ; function there. ; CHECK-ARM-NEXT: cmp r[[OLD]], r0 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. ; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK-ARM: mov r0, r[[OLD]] +; CHECK: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK-ARM: mov r0, r[[OLD]] +; CHECK-ARM-NEXT: bx lr ret i8 %old } @@ -1078,20 +1079,21 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw ; function there. ; CHECK-ARM-NEXT: cmp r[[OLD]], r0 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. ; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK-ARM: mov r0, r[[OLD]] +; CHECK: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK-ARM: mov r0, r[[OLD]] +; CHECK-ARM-NEXT: bx lr ret i16 %old } @@ -1110,20 +1112,21 @@ define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLD]], r0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. ; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK: str{{(.w)?}} r[[OLD]], +; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: str{{(.w)?}} r[[OLD]], +; CHECK-ARM-NEXT: bx lr ret void } @@ -1148,16 +1151,16 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 ; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r2, r3 is a reasonable guess. ; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] +; CHECK-NEXT: pop ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll index 893fef3add7e1..31eff16fcc3c4 100644 --- a/test/CodeGen/ARM/bfi.ll +++ b/test/CodeGen/ARM/bfi.ll @@ -77,7 +77,7 @@ entry: define i32 @f7(i32 %x, i32 %y) { ; CHECK-LABEL: f7: -; CHECK: bfi r1, r0, #4, #1 +; CHECK: bfi r0, r2, #4, #1 %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 %and = and i32 %x, 4 %or = or i32 %y2, 16 @@ -88,8 +88,8 @@ define i32 @f7(i32 %x, i32 %y) { define i32 @f8(i32 %x, i32 %y) { ; CHECK-LABEL: f8: -; CHECK: bfi r1, r0, #4, #1 -; CHECK: bfi r1, r0, #5, #1 +; CHECK: bfi r0, r2, #4, #1 +; CHECK: bfi r0, r2, #5, #1 %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 %and = and i32 %x, 4 %or = or i32 %y2, 48 @@ -111,7 +111,7 @@ define i32 @f9(i32 %x, i32 %y) { define i32 @f10(i32 %x, i32 %y) { ; CHECK-LABEL: f10: -; CHECK: bfi r1, r0, #4, #2 +; CHECK: bfi r0, r2, #4, #2 %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 %and = and i32 %x, 4 %or = or i32 %y2, 32 @@ -128,7 +128,7 @@ define i32 @f10(i32 %x, i32 %y) { define i32 @f11(i32 %x, i32 %y) { ; CHECK-LABEL: f11: -; CHECK: bfi r1, r0, #4, #3 +; CHECK: bfi r0, r2, #4, #3 %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 %and = and i32 %x, 4 %or = or i32 %y2, 32 @@ -150,7 +150,7 @@ define i32 @f11(i32 %x, i32 %y) { define i32 @f12(i32 %x, i32 %y) { ; CHECK-LABEL: f12: -; CHECK: bfi r1, r0, #4, #1 +; CHECK: bfi r0, r2, #4, #1 %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 %and = and i32 %x, 4 %or = or i32 %y2, 16 diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll index 691f8be4ab66b..8be59898bd0fe 100644 --- a/test/CodeGen/ARM/bic.ll +++ b/test/CodeGen/ARM/bic.ll @@ -1,17 +1,24 @@ ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: bic r0, r0, r1 %tmp = xor i32 %b, 4294967295 %tmp1 = and i32 %a, %tmp ret i32 %tmp1 } -; CHECK: bic r0, r0, r1 - define i32 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: bic r0, r0, r1 %tmp = xor i32 %b, 4294967295 %tmp1 = and i32 %tmp, %a ret i32 %tmp1 } -; CHECK: bic r0, r0, r1 +define i32 @f3(i32 %a) { +; CHECK-LABEL: f3: +; CHECK: bic r0, r0, #255 + %tmp = and i32 %a, -256 + ret i32 %tmp +} diff --git a/test/CodeGen/ARM/bool-ext-inc.ll b/test/CodeGen/ARM/bool-ext-inc.ll new file mode 100644 index 0000000000000..fe43f1b2ef93d --- /dev/null +++ b/test/CodeGen/ARM/bool-ext-inc.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi -mattr=neon | FileCheck %s + +define i32 @sext_inc(i1 zeroext %x) { +; CHECK-LABEL: sext_inc: +; CHECK: @ BB#0: +; CHECK-NEXT: rsb r0, r0, #1 +; CHECK-NEXT: mov pc, lr + %ext = sext i1 %x to i32 + %add = add i32 %ext, 1 + ret i32 %add +} + +define <4 x i32> @sext_inc_vec(<4 x i1> %x) { +; CHECK-LABEL: sext_inc_vec: +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmov.i32 q9, #0x1f +; CHECK-NEXT: vmov.i32 q10, #0x1 +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vneg.s32 q9, q9 +; CHECK-NEXT: vshl.i32 q8, q8, #31 +; CHECK-NEXT: vshl.s32 q8, q8, q9 +; CHECK-NEXT: vadd.i32 q8, q8, q10 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %ext = sext <4 x i1> %x to <4 x i32> + %add = add <4 x i32> %ext, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %add +} + diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index b1b3b46dce249..fc85a3a2e6834 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -102,6 +102,10 @@ ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=+fp-only-sp -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-FAST ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CORTEX-M7-DOUBLE ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING +; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m23 | FileCheck %s --check-prefix=CORTEX-M23 +; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=CORTEX-M33 +; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M33-FAST +; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4 | FileCheck %s --check-prefix=CORTEX-R4 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4f | FileCheck %s --check-prefix=CORTEX-R4F ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5 @@ -182,6 +186,8 @@ ; ARMv7a ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN +; ARMv7ve +; RUN: llc < %s -mtriple=armv7ve-none-linux-gnueabi | FileCheck %s --check-prefix=V7VE ; ARMv7r ; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN @@ -210,6 +216,12 @@ ; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-r52 -mattr=-neon,+fp-only-sp,+d16 | FileCheck %s --check-prefix=ARMv8R --check-prefix=ARMv8R-SP ; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-r52 | FileCheck %s --check-prefix=ARMv8R --check-prefix=ARMv8R-NEON +; ARMv8-M +; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m23 | FileCheck %s --check-prefix=NO-STRICT-ALIGN +; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m23 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN +; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=NO-STRICT-ALIGN +; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN + ; XSCALE: .eabi_attribute 6, 5 ; XSCALE: .eabi_attribute 8, 1 ; XSCALE: .eabi_attribute 9, 1 @@ -369,6 +381,22 @@ ; V7-FAST-NOT: .eabi_attribute 22 ; V7-FAST: .eabi_attribute 23, 1 +; V7VE: .syntax unified +; V7VE: .eabi_attribute 6, 10 @ Tag_CPU_arch +; V7VE: .eabi_attribute 7, 65 @ Tag_CPU_arch_profile +; V7VE: .eabi_attribute 8, 1 @ Tag_ARM_ISA_use +; V7VE: .eabi_attribute 9, 2 @ Tag_THUMB_ISA_use +; V7VE: .eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use +; V7VE: .eabi_attribute 20, 1 @ Tag_ABI_FP_denormal +; V7VE: .eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions +; V7VE: .eabi_attribute 23, 3 @ Tag_ABI_FP_number_model +; V7VE: .eabi_attribute 24, 1 @ Tag_ABI_align_needed +; V7VE: .eabi_attribute 25, 1 @ Tag_ABI_align_preserved +; V7VE: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format +; V7VE: .eabi_attribute 42, 1 @ Tag_MPextension_use +; V7VE: .eabi_attribute 44, 2 @ Tag_DIV_use +; V7VE: .eabi_attribute 68, 3 @ Tag_Virtualization_use + ; V8: .syntax unified ; V8: .eabi_attribute 67, "2.09" ; V8: .eabi_attribute 6, 14 @@ -1310,6 +1338,55 @@ ; CORTEX-A32-FAST-NOT: .eabi_attribute 22 ; CORTEX-A32-FAST: .eabi_attribute 23, 1 +; CORTEX-M23: .cpu cortex-m23 +; CORTEX-M23: .eabi_attribute 6, 16 +; CORTEX-M23: .eabi_attribute 7, 77 +; CORTEX-M23: .eabi_attribute 8, 0 +; CORTEX-M23: .eabi_attribute 9, 3 +; CORTEX-M23: .eabi_attribute 17, 1 +;; We default to IEEE 754 compliance +; CORTEX-M23-NOT: .eabi_attribute 19 +; CORTEX-M23: .eabi_attribute 20, 1 +; CORTEX-M23: .eabi_attribute 21, 1 +; CORTEX-M23: .eabi_attribute 23, 3 +; CORTEX-M23: .eabi_attribute 34, 1 +; CORTEX-M23: .eabi_attribute 24, 1 +; CORTEX-M23-NOT: .eabi_attribute 27 +; CORTEX-M23-NOT: .eabi_attribute 28 +; CORTEX-M23: .eabi_attribute 25, 1 +; CORTEX-M23: .eabi_attribute 38, 1 +; CORTEX-M23: .eabi_attribute 14, 0 +; CORTEX-M23-NOT: .eabi_attribute 44 + +; CORTEX-M33: .cpu cortex-m33 +; CORTEX-M33: .eabi_attribute 6, 17 +; CORTEX-M33: .eabi_attribute 7, 77 +; CORTEX-M33: .eabi_attribute 8, 0 +; CORTEX-M33: .eabi_attribute 9, 3 +; CORTEX-M33: .fpu fpv5-sp-d16 +; CORTEX-M33: .eabi_attribute 17, 1 +;; We default to IEEE 754 compliance +; CORTEX-M23-NOT: .eabi_attribute 19 +; CORTEX-M33: .eabi_attribute 20, 1 +; CORTEX-M33: .eabi_attribute 21, 1 +; CORTEX-M33: .eabi_attribute 23, 3 +; CORTEX-M33: .eabi_attribute 34, 1 +; CORTEX-M33: .eabi_attribute 24, 1 +; CORTEX-M33: .eabi_attribute 25, 1 +; CORTEX-M33: .eabi_attribute 27, 1 +; CORTEX-M33-NOT: .eabi_attribute 28 +; CORTEX-M33: .eabi_attribute 36, 1 +; CORTEX-M33: .eabi_attribute 38, 1 +; CORTEX-M33: .eabi_attribute 46, 1 +; CORTEX-M33-NOT: .eabi_attribute 44 +; CORTEX-M33: .eabi_attribute 14, 0 + +; CORTEX-M33-FAST-NOT: .eabi_attribute 19 +; CORTEX-M33-FAST: .eabi_attribute 20, 2 +; CORTEX-M33-FAST-NOT: .eabi_attribute 21 +; CORTEX-M33-FAST-NOT: .eabi_attribute 22 +; CORTEX-M33-FAST: .eabi_attribute 23, 1 + ; CORTEX-A35: .cpu cortex-a35 ; CORTEX-A35: .eabi_attribute 6, 14 ; CORTEX-A35: .eabi_attribute 7, 65 diff --git a/test/CodeGen/ARM/cmp1-peephole-thumb.mir b/test/CodeGen/ARM/cmp1-peephole-thumb.mir new file mode 100644 index 0000000000000..5ace58fd06584 --- /dev/null +++ b/test/CodeGen/ARM/cmp1-peephole-thumb.mir @@ -0,0 +1,78 @@ +# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s + +--- | + ; ModuleID = '<stdin>' + source_filename = "<stdin>" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumb-none--eabi" + + define i32 @f(i32 %a, i32 %b) { + entry: + %mul = mul nsw i32 %b, %a + %cmp = icmp eq i32 %mul, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv + } + +... +--- +name: f +# CHECK-LABEL: name: f +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: tgpr } + - { id: 1, class: tgpr } + - { id: 2, class: tgpr } + - { id: 3, class: tgpr } + - { id: 4, class: tgpr } + - { id: 5, class: tgpr } +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + +# CHECK: tMOVi8 1, 14, _ +# CHECK: tMOVi8 0, 14, _ +# CHECK: tMUL %1, %0, 14, _ +# CHECK-NOT: tCMPi8 +body: | + bb.0.entry: + successors: %bb.1.entry(0x40000000), %bb.2.entry(0x40000000) + liveins: %r0, %r1 + + %1 = COPY %r1 + %0 = COPY %r0 + %2, %cpsr = tMUL %1, %0, 14, _ + %3, %cpsr = tMOVi8 1, 14, _ + %4, %cpsr = tMOVi8 0, 14, _ + tCMPi8 killed %2, 0, 14, _, implicit-def %cpsr + tBcc %bb.2.entry, 0, %cpsr + + bb.1.entry: + successors: %bb.2.entry(0x80000000) + + + bb.2.entry: + %5 = PHI %4, %bb.1.entry, %3, %bb.0.entry + %r0 = COPY %5 + tBX_RET 14, _, implicit %r0 + +... diff --git a/test/CodeGen/ARM/cmp2-peephole-thumb.mir b/test/CodeGen/ARM/cmp2-peephole-thumb.mir new file mode 100644 index 0000000000000..6e9ca70f1741d --- /dev/null +++ b/test/CodeGen/ARM/cmp2-peephole-thumb.mir @@ -0,0 +1,108 @@ +# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s + +# Here we check that the peephole cmp rewrite is not triggered, because +# there is store instruction between the tMUL and tCMP, i.e. there are +# no constants to reorder. + +--- | + ; ModuleID = 'cmp2-peephole-thumb.ll' + source_filename = "<stdin>" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumb-none--eabi" + + define i32 @g(i32 %a, i32 %b) { + entry: + %retval = alloca i32, align 4 + %mul = alloca i32, align 4 + %mul1 = mul nsw i32 %a, %b + store i32 %mul1, i32* %mul, align 4 + %0 = load i32, i32* %mul, align 4 + %cmp = icmp sle i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + + if.then: ; preds = %entry + store i32 42, i32* %retval, align 4 + br label %return + + if.end: ; preds = %entry + store i32 1, i32* %retval, align 4 + br label %return + + return: ; preds = %if.end, %if.then + %1 = load i32, i32* %retval, align 4 + ret i32 %1 + } + +... +--- +name: g +# CHECK-LABEL: name: g +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: tgpr } + - { id: 1, class: tgpr } + - { id: 2, class: tgpr } + - { id: 3, class: tgpr } + - { id: 4, class: tgpr } + - { id: 5, class: tgpr } +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +stack: + - { id: 0, name: retval, offset: 0, size: 4, alignment: 4, local-offset: -4 } + - { id: 1, name: mul, offset: 0, size: 4, alignment: 4, local-offset: -8 } + +# CHECK: tMUL +# CHECK-NEXT: tSTRspi +# CHECK-NEXT: tCMPi8 +body: | + bb.0.entry: + successors: %bb.1.if.then(0x40000000), %bb.2.if.end(0x40000000) + liveins: %r0, %r1 + + %1 = COPY %r1 + %0 = COPY %r0 + %2, %cpsr = tMUL %0, %1, 14, _ + tSTRspi %2, %stack.1.mul, 0, 14, _ :: (store 4 into %ir.mul) + tCMPi8 %2, 0, 14, _, implicit-def %cpsr + tBcc %bb.2.if.end, 12, %cpsr + tB %bb.1.if.then, 14, _ + + bb.1.if.then: + successors: %bb.3.return(0x80000000) + + %4, %cpsr = tMOVi8 42, 14, _ + tSTRspi killed %4, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval) + tB %bb.3.return, 14, _ + + bb.2.if.end: + successors: %bb.3.return(0x80000000) + + %3, %cpsr = tMOVi8 1, 14, _ + tSTRspi killed %3, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval) + + bb.3.return: + %5 = tLDRspi %stack.0.retval, 0, 14, _ :: (dereferenceable load 4 from %ir.retval) + %r0 = COPY %5 + tBX_RET 14, _, implicit %r0 + +... diff --git a/test/CodeGen/ARM/cmpxchg-weak.ll b/test/CodeGen/ARM/cmpxchg-weak.ll index 4038528c91bc8..0d5681aafbcb0 100644 --- a/test/CodeGen/ARM/cmpxchg-weak.ll +++ b/test/CodeGen/ARM/cmpxchg-weak.ll @@ -13,14 +13,16 @@ define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) { ; CHECK-NEXT: dmb ish ; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-NEXT: cmp [[SUCCESS]], #0 -; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: BB#2: -; CHECK-NEXT: dmb ish ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr ; CHECK-NEXT: [[LDFAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: str r3, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: [[SUCCESSBB]]: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr diff --git a/test/CodeGen/ARM/constantpool-promote.ll b/test/CodeGen/ARM/constantpool-promote.ll index fb1bdfd62fb7c..8df7e100c0514 100644 --- a/test/CodeGen/ARM/constantpool-promote.ll +++ b/test/CodeGen/ARM/constantpool-promote.ll @@ -1,10 +1,15 @@ -; RUN: llc -relocation-model=static < %s | FileCheck %s -; RUN: llc -relocation-model=pic < %s | FileCheck %s -; RUN: llc -relocation-model=ropi < %s | FileCheck %s -; RUN: llc -relocation-model=rwpi < %s | FileCheck %s - -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" -target triple = "armv7--linux-gnueabihf" +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=static < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=ropi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=rwpi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM +; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=static < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB +; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB +; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=ropi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB +; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=rwpi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB +; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=static < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M +; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M +; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=ropi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M +; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=rwpi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M @.str = private unnamed_addr constant [2 x i8] c"s\00", align 1 @.str1 = private unnamed_addr constant [69 x i8] c"this string is far too long to fit in a literal pool by far and away\00", align 1 @@ -16,6 +21,7 @@ target triple = "armv7--linux-gnueabihf" @.arr3 = private unnamed_addr constant [2 x i16*] [i16* null, i16* null], align 4 @.ptr = private unnamed_addr constant [2 x i16*] [i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr2, i32 0, i32 0), i16* null], align 2 @.arr4 = private unnamed_addr constant [2 x i16] [i16 3, i16 4], align 16 +@.zerosize = private unnamed_addr constant [0 x i16] zeroinitializer, align 4 ; CHECK-LABEL: @test1 ; CHECK: adr r0, [[x:.*]] @@ -134,18 +140,56 @@ define void @test9() #0 { ret void } +; Ensure that zero sized values are supported / not promoted. +; CHECK-LABEL: @pr32130 +; CHECK-NOT: adr +define void @pr32130() #0 { + tail call void @c(i16* getelementptr inbounds ([0 x i16], [0 x i16]* @.zerosize, i32 0, i32 0)) #2 + ret void +} + +; CHECK-LABEL: @test10 +; CHECK-V6M: adr r{{[0-9]*}}, [[x:.*]] +; CHECK-V6M: [[x]]: +; CHECK-V6M: .asciz "s\000\000" +; CHECK-V7: ldrb{{(.w)?}} r{{[0-9]*}}, [[x:.*]] +; CHECK-V7: [[x]]: +; CHECK-V7: .asciz "s\000\000" +define void @test10(i8* %a) local_unnamed_addr #0 { + call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i32 1, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: @test11 +; CHECK-V6M: adr r{{[0-9]*}}, [[x:.*]] +; CHECK-V6M: [[x]]: +; CHECK-V6M: .short 3 +; CHECK-V6M: .short 4 +; CHECK-V7THUMB: ldrh{{(.w)?}} r{{[0-9]*}}, [[x:.*]] +; CHECK-V7THUMB: [[x]]: +; CHECK-V7THUMB: .short 3 +; CHECK-V7THUMB: .short 4 +; CHECK-V7ARM: adr r{{[0-9]*}}, [[x:.*]] +; CHECK-V7ARM: [[x]]: +; CHECK-V7ARM: .short 3 +; CHECK-V7ARM: .short 4 +define void @test11(i16* %a) local_unnamed_addr #0 { + call void @llvm.memmove.p0i16.p0i16.i32(i16* %a, i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0), i32 2, i32 2, i1 false) + ret void +} + declare void @b(i8*) #1 declare void @c(i16*) #1 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) local_unnamed_addr +declare void @llvm.memmove.p0i16.p0i16.i32(i16*, i16*, i32, i32, i1) local_unnamed_addr attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { nounwind } !llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 1, !"min_enum_size", i32 4} -!2 = !{!"Apple LLVM version 6.1.0 (clang-602.0.53) (based on LLVM 3.6.0svn)"} diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll index 2987b9a2105aa..197746c5f122b 100644 --- a/test/CodeGen/ARM/debug-info-s16-reg.ll +++ b/test/CodeGen/ARM/debug-info-s16-reg.ll @@ -3,8 +3,6 @@ ; Test dwarf reg no for s16 ;CHECK: super-register DW_OP_regx ;CHECK-NEXT: 264 -;CHECK-NEXT: DW_OP_piece -;CHECK-NEXT: 4 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "thumbv7-apple-macosx10.6.7" diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll index b31d1b7bed4f8..094b104997888 100644 --- a/test/CodeGen/ARM/debug-info-sreg2.ll +++ b/test/CodeGen/ARM/debug-info-sreg2.ll @@ -10,7 +10,7 @@ target triple = "thumbv7-apple-macosx10.6.7" ; CHECK: 0x00000000: Beginning address offset: ; CHECK-NEXT: Ending address offset: -; CHECK-NEXT: Location description: 90 {{.. .. .. .. $}} +; CHECK-NEXT: Location description: 90 {{.. .. $}} define void @_Z3foov() optsize ssp !dbg !1 { entry: diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll index 997f50760f3a9..8837315197554 100644 --- a/test/CodeGen/ARM/div.ll +++ b/test/CodeGen/ARM/div.ll @@ -10,12 +10,18 @@ ; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV ; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 | \ ; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-EABI +; RUN: llc < %s -mtriple=armv7ve-none-linux-gnu | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV +; RUN: llc < %s -mtriple=thumbv7ve-none-linux-gnu | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV \ +; RUN: -check-prefix=CHECK-THUMB define i32 @f1(i32 %a, i32 %b) { entry: ; CHECK-LABEL: f1 ; CHECK-SWDIV: __divsi3 +; CHECK-THUMB: .thumb_func ; CHECK-HWDIV: sdiv ; CHECK-EABI: __aeabi_idiv @@ -28,6 +34,7 @@ entry: ; CHECK-LABEL: f2 ; CHECK-SWDIV: __udivsi3 +; CHECK-THUMB: .thumb_func ; CHECK-HWDIV: udiv ; CHECK-EABI: __aeabi_uidiv @@ -40,6 +47,7 @@ entry: ; CHECK-LABEL: f3 ; CHECK-SWDIV: __modsi3 +; CHECK-THUMB: .thumb_func ; CHECK-HWDIV: sdiv ; CHECK-HWDIV: mls @@ -55,6 +63,7 @@ entry: ; CHECK-LABEL: f4 ; CHECK-SWDIV: __umodsi3 +; CHECK-THUMB: .thumb_func ; CHECK-HWDIV: udiv ; CHECK-HWDIV: mls diff --git a/test/CodeGen/ARM/fast-isel-align.ll b/test/CodeGen/ARM/fast-isel-align.ll index 701884e926a89..71cd73a4a25d1 100644 --- a/test/CodeGen/ARM/fast-isel-align.ll +++ b/test/CodeGen/ARM/fast-isel-align.ll @@ -72,10 +72,10 @@ entry: %4 = fcmp une float %3, 0.000000e+00 ; ARM: ldr r[[R:[0-9]+]], [r0, #2] ; ARM: vmov s0, r[[R]] -; ARM: vcmpe.f32 s0, #0 +; ARM: vcmp.f32 s0, #0 ; THUMB: ldr.w r[[R:[0-9]+]], [r0, #2] ; THUMB: vmov s0, r[[R]] -; THUMB: vcmpe.f32 s0, #0 +; THUMB: vcmp.f32 s0, #0 ret i1 %4 } diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll index a9d7e4580638e..543b6c285f3f7 100644 --- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll +++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll @@ -7,8 +7,8 @@ entry: ; ARM: t1a ; THUMB: t1a %cmp = fcmp oeq float %a, 0.000000e+00 -; ARM: vcmpe.f32 s{{[0-9]+}}, #0 -; THUMB: vcmpe.f32 s{{[0-9]+}}, #0 +; ARM: vcmp.f32 s{{[0-9]+}}, #0 +; THUMB: vcmp.f32 s{{[0-9]+}}, #0 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry @@ -28,9 +28,9 @@ entry: ; THUMB: t1b %cmp = fcmp oeq float %a, -0.000000e+00 ; ARM: vldr -; ARM: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}} +; ARM: vcmp.f32 s{{[0-9]+}}, s{{[0-9]+}} ; THUMB: vldr -; THUMB: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}} +; THUMB: vcmp.f32 s{{[0-9]+}}, s{{[0-9]+}} br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry @@ -46,8 +46,8 @@ entry: ; ARM: t2a ; THUMB: t2a %cmp = fcmp oeq double %a, 0.000000e+00 -; ARM: vcmpe.f64 d{{[0-9]+}}, #0 -; THUMB: vcmpe.f64 d{{[0-9]+}}, #0 +; ARM: vcmp.f64 d{{[0-9]+}}, #0 +; THUMB: vcmp.f64 d{{[0-9]+}}, #0 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry @@ -65,9 +65,9 @@ entry: ; THUMB: t2b %cmp = fcmp oeq double %a, -0.000000e+00 ; ARM: vldr -; ARM: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}} +; ARM: vcmp.f64 d{{[0-9]+}}, d{{[0-9]+}} ; THUMB: vldr -; THUMB: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}} +; THUMB: vcmp.f64 d{{[0-9]+}}, d{{[0-9]+}} br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll index 442459bc0582c..eb32ee54c0959 100644 --- a/test/CodeGen/ARM/fold-stack-adjust.ll +++ b/test/CodeGen/ARM/fold-stack-adjust.ll @@ -135,7 +135,7 @@ define void @test_fold_point(i1 %tst) minsize { ; Important to check for beginning of basic block, because if it gets ; if-converted the test is probably no longer checking what it should. -; CHECK: {{LBB[0-9]+_2}}: +; CHECK: %end ; CHECK-NEXT: vpop {d7, d8} ; CHECK-NEXT: pop {r4, pc} diff --git a/test/CodeGen/ARM/fp-only-sp.ll b/test/CodeGen/ARM/fp-only-sp.ll new file mode 100644 index 0000000000000..2c7b2acbde9c5 --- /dev/null +++ b/test/CodeGen/ARM/fp-only-sp.ll @@ -0,0 +1,62 @@ +; RUN: llc -mtriple=thumbv7em-apple-macho -mcpu=cortex-m4 %s -o - -O0 | FileCheck %s +; RUN: llc -mtriple=thumbv7em-apple-macho -mcpu=cortex-m4 %s -o - | FileCheck %s + +; Note: vldr and vstr really do have 64-bit variants even with fp-only-sp +define void @test_load_store(double* %addr) { +; CHECK-LABEL: test_load_store: +; CHECK: vldr [[TMP:d[0-9]+]], [r0] +; CHECK: vstr [[TMP]], [r0] + %val = load volatile double, double* %addr + store volatile double %val, double* %addr + ret void +} + +define void @test_cmp(double %l, double %r, i1* %addr.dst) { +; CHECK-LABEL: test_cmp: +; CHECK: bl ___eqdf2 + %res = fcmp oeq double %l, %r + store i1 %res, i1* %addr.dst + ret void +} + +define void @test_ext(float %in, double* %addr) { +; CHECK-LABEL: test_ext: +; CHECK: bl ___extendsfdf2 + %res = fpext float %in to double + store double %res, double* %addr + ret void +} + +define void @test_trunc(double %in, float* %addr) { +; CHECK-LABEL: test_trunc: +; CHECK: bl ___truncdfsf2 + %res = fptrunc double %in to float + store float %res, float* %addr + ret void +} + +define void @test_itofp(i32 %in, double* %addr) { +; CHECK-LABEL: test_itofp: +; CHECK: bl ___floatsidf + %res = sitofp i32 %in to double + store double %res, double* %addr +; %res = fptoui double %tmp to i32 + ret void +} + +define i32 @test_fptoi(double* %addr) { +; CHECK-LABEL: test_fptoi: +; CHECK: bl ___fixunsdfsi + %val = load double, double* %addr + %res = fptoui double %val to i32 + ret i32 %res +} + +define void @test_binop(double* %addr) { +; CHECK-LABEL: test_binop: +; CHECK: bl ___adddf3 + %in = load double, double* %addr + %res = fadd double %in, %in + store double %res, double* %addr + ret void +} diff --git a/test/CodeGen/ARM/fp16-promote.ll b/test/CodeGen/ARM/fp16-promote.ll index 8241236872873..9148ac109ae38 100644 --- a/test/CodeGen/ARM/fp16-promote.ll +++ b/test/CodeGen/ARM/fp16-promote.ll @@ -161,14 +161,14 @@ define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 { ret void } -; Test only two variants of fcmp. These get translated to f32 vcmpe +; Test only two variants of fcmp. These get translated to f32 vcmp ; instructions anyway. ; CHECK-ALL-LABEL: test_fcmp_une: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-VFP: vcmpe.f32 +; CHECK-VFP: vcmp.f32 ; CHECK-NOVFP: bl __aeabi_fcmpeq ; CHECK-FP16: vmrs APSR_nzcv, fpscr ; CHECK-ALL: movw{{ne|eq}} @@ -184,7 +184,7 @@ define i1 @test_fcmp_une(half* %p, half* %q) #0 { ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-VFP: vcmpe.f32 +; CHECK-VFP: vcmp.f32 ; CHECK-NOVFP: bl __aeabi_fcmpeq ; CHECK-FP16: vmrs APSR_nzcv, fpscr ; CHECK-LIBCALL: movw{{ne|eq}} @@ -597,7 +597,7 @@ define void @test_fma(half* %p, half* %q, half* %r) #0 { ; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL-LABEL: test_fabs: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: bfc +; CHECK-LIBCALL: bic ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fabs(half* %p) { %a = load half, half* %p, align 2 @@ -643,10 +643,11 @@ define void @test_maxnum(half* %p, half* %q) #0 { } ; CHECK-ALL-LABEL: test_minnan: -; CHECK-FP16: vcvtb.f32.f16 +; CHECK-FP16: vmov.f32 s0, #1.000000e+00 ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL-VFP: vmov.f32 s{{[0-9]+}}, #1.000000e+00 +; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 ; CHECK-VFP: vmin.f32 ; CHECK-NOVFP: bl __aeabi_fcmpge ; CHECK-FP16: vcvtb.f16.f32 @@ -660,10 +661,11 @@ define void @test_minnan(half* %p) #0 { } ; CHECK-ALL-LABEL: test_maxnan: +; CHECK-FP16: vmov.f32 s0, #1.000000e+00 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcvtb.f32.f16 -; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL-VFP: vmov.f32 s0, #1.000000e+00 +; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 ; CHECK-VFP: vmax.f32 ; CHECK-NOVFP: bl __aeabi_fcmple ; CHECK-FP16: vcvtb.f16.f32 @@ -685,7 +687,7 @@ define void @test_maxnan(half* %p) #0 { ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-VFP-LIBCALL: vbsl -; CHECK-NOVFP: bfc +; CHECK-NOVFP: bic ; CHECK-NOVFP: and ; CHECK-NOVFP: orr ; CHECK-LIBCALL: bl __aeabi_f2h @@ -845,21 +847,15 @@ define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 { } ; CHECK-ALL-LABEL: test_extractelement: +; CHECK-VFP: push {{{.*}}, lr} ; CHECK-VFP: sub sp, sp, #8 -; CHECK-VFP: ldrh -; CHECK-VFP: ldrh -; CHECK-VFP: orr -; CHECK-VFP: str -; CHECK-VFP: ldrh -; CHECK-VFP: ldrh -; CHECK-VFP: orr -; CHECK-VFP: str +; CHECK-VFP: ldrd ; CHECK-VFP: mov ; CHECK-VFP: orr ; CHECK-VFP: ldrh ; CHECK-VFP: strh ; CHECK-VFP: add sp, sp, #8 -; CHECK-VFP: bx lr +; CHECK-VFP: pop {{{.*}}, pc} ; CHECK-NOVFP: ldrh ; CHECK-NOVFP: strh ; CHECK-NOVFP: ldrh diff --git a/test/CodeGen/ARM/fp16-v3.ll b/test/CodeGen/ARM/fp16-v3.ll index e26455e61e7f0..a37f71d9ba881 100644 --- a/test/CodeGen/ARM/fp16-v3.ll +++ b/test/CodeGen/ARM/fp16-v3.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7a--none-eabi" ; CHECK-LABEL: test_vec3: -; CHECK-DAG: vcvtb.f32.f16 [[SREG1:s[0-9]+]], +; CHECK-DAG: vmov.f32 [[SREG1:s[0-9]+]], #1.200000e+01 ; CHECK-DAG: vcvt.f32.s32 [[SREG2:s[0-9]+]], ; CHECK-DAG: vcvtb.f16.f32 [[SREG3:s[0-9]+]], [[SREG2]] ; CHECK-DAG: vcvtb.f32.f16 [[SREG4:s[0-9]+]], [[SREG3]] diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index 45bb6d2f702d0..a828541094507 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -10,7 +10,7 @@ entry: ; CHECK-LABEL: t1: ; CHECK: vldr [[S0:s[0-9]+]], ; CHECK: vldr [[S1:s[0-9]+]], -; CHECK: vcmpe.f32 [[S1]], [[S0]] +; CHECK: vcmp.f32 [[S1]], [[S0]] ; CHECK: vmrs APSR_nzcv, fpscr ; CHECK: beq %0 = load float, float* %a @@ -35,10 +35,10 @@ entry: ; CHECK-NOT: vldr ; CHECK: ldrd [[REG1:(r[0-9]+)]], [[REG2:(r[0-9]+)]], [r0] ; CHECK-NOT: b LBB -; CHECK: bfc [[REG2]], #31, #1 +; CHECK: bic [[REG2]], [[REG2]], #-2147483648 ; CHECK: cmp [[REG1]], #0 ; CHECK: cmpeq [[REG2]], #0 -; CHECK-NOT: vcmpe.f32 +; CHECK-NOT: vcmp.f32 ; CHECK-NOT: vmrs ; CHECK: bne %0 = load double, double* %a @@ -61,7 +61,7 @@ entry: ; CHECK: ldr [[REG3:(r[0-9]+)]], [r0] ; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648 ; CHECK: tst [[REG3]], [[REG4]] -; CHECK-NOT: vcmpe.f32 +; CHECK-NOT: vcmp.f32 ; CHECK-NOT: vmrs ; CHECK: bne %0 = load float, float* %a diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll index e3ffd45a396d8..67326e0001697 100644 --- a/test/CodeGen/ARM/fpcmp.ll +++ b/test/CodeGen/ARM/fpcmp.ll @@ -12,7 +12,7 @@ entry: define i32 @f2(float %a) { ;CHECK-LABEL: f2: -;CHECK: vcmpe.f32 +;CHECK: vcmp.f32 ;CHECK: moveq entry: %tmp = fcmp oeq float %a, 1.000000e+00 ; <i1> [#uses=1] @@ -52,7 +52,7 @@ entry: define i32 @f6(float %a) { ;CHECK-LABEL: f6: -;CHECK: vcmpe.f32 +;CHECK: vcmp.f32 ;CHECK: movne entry: %tmp = fcmp une float %a, 1.000000e+00 ; <i1> [#uses=1] diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll index c1696c9be1b7c..698c7506cc593 100644 --- a/test/CodeGen/ARM/fpcmp_ueq.ll +++ b/test/CodeGen/ARM/fpcmp_ueq.ll @@ -17,7 +17,7 @@ entry: ; CHECK-ARMv4: moveq r0, #42 ; CHECK-ARMv7-LABEL: f7: -; CHECK-ARMv7: vcmpe.f32 +; CHECK-ARMv7: vcmp.f32 ; CHECK-ARMv7: vmrs APSR_nzcv, fpscr ; CHECK-ARMv7: movweq ; CHECK-ARMv7-NOT: vmrs diff --git a/test/CodeGen/ARM/fpscr-intrinsics.ll b/test/CodeGen/ARM/fpscr-intrinsics.ll new file mode 100644 index 0000000000000..64b97525febfe --- /dev/null +++ b/test/CodeGen/ARM/fpscr-intrinsics.ll @@ -0,0 +1,44 @@ +; RUN: llc < %s -O0 -mtriple=armv7-eabi -mcpu=cortex-a8 -mattr=+neon,+fp-armv8 | FileCheck %s +; RUN: llc < %s -O3 -mtriple=armv7-eabi -mcpu=cortex-a8 -mattr=+neon,+fp-armv8 | FileCheck %s + +@a = common global double 0.000000e+00, align 8 + +; Function Attrs: noinline nounwind uwtable +define void @strtod() { +entry: + ; CHECK: vmrs r{{[0-9]+}}, fpscr + %0 = call i32 @llvm.flt.rounds() + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + store double 5.000000e-01, double* @a, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; Function Attrs: nounwind +define void @fn1(i32* nocapture %p) local_unnamed_addr { +entry: + ; CHECK: vmrs r{{[0-9]+}}, fpscr + %0 = tail call i32 @llvm.arm.get.fpscr() + store i32 %0, i32* %p, align 4 + ; CHECK: vmsr fpscr, r{{[0-9]+}} + tail call void @llvm.arm.set.fpscr(i32 1) + ; CHECK: vmrs r{{[0-9]+}}, fpscr + %1 = tail call i32 @llvm.arm.get.fpscr() + %arrayidx1 = getelementptr inbounds i32, i32* %p, i32 1 + store i32 %1, i32* %arrayidx1, align 4 + ret void +} + +; Function Attrs: nounwind readonly +declare i32 @llvm.arm.get.fpscr() + +; Function Attrs: nounwind writeonly +declare void @llvm.arm.set.fpscr(i32) + +; Function Attrs: nounwind +declare i32 @llvm.flt.rounds() diff --git a/test/CodeGen/ARM/gpr-paired-spill.ll b/test/CodeGen/ARM/gpr-paired-spill.ll index ef3e5a54a2dbe..797b147d5d016 100644 --- a/test/CodeGen/ARM/gpr-paired-spill.ll +++ b/test/CodeGen/ARM/gpr-paired-spill.ll @@ -16,22 +16,22 @@ define void @foo(i64* %addr) { ; an LDMIA was created with both a FrameIndex and an offset, which ; is not allowed. -; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] -; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp] +; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp] -; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] -; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp] +; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp] ; We also want to ensure the register scavenger is working (i.e. an ; offset from sp can be generated), so we need two spills. -; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}} -; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} -; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}} +; CHECK-WITHOUT-LDRD-DAG: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}} ; In principle LLVM may have to recalculate the offset. At the moment ; it reuses the original though. -; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} -; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}} store volatile i64 %val1, i64* %addr store volatile i64 %val2, i64* %addr diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll index 5725a404c3201..c7e18d35dbee1 100644 --- a/test/CodeGen/ARM/ifcvt10.ll +++ b/test/CodeGen/ARM/ifcvt10.ll @@ -10,8 +10,6 @@ entry: ; CHECK: vpop {d8} ; CHECK-NOT: vpopne ; CHECK: pop {r7, pc} -; CHECK: vpop {d8} -; CHECK: pop {r7, pc} br i1 undef, label %if.else, label %if.then if.then: ; preds = %entry diff --git a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll new file mode 100644 index 0000000000000..74117d3896bdc --- /dev/null +++ b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi | FileCheck %s -check-prefix=LE +; RUN: llc < %s -mtriple=armeb-eabi | FileCheck %s -check-prefix=BE + +define void @i24_or(i24* %a) { +; LE-LABEL: i24_or: +; LE: @ BB#0: +; LE-NEXT: ldrh r1, [r0] +; LE-NEXT: orr r1, r1, #384 +; LE-NEXT: strh r1, [r0] +; LE-NEXT: mov pc, lr +; +; BE-LABEL: i24_or: +; BE: @ BB#0: +; BE-NEXT: ldrh r1, [r0] +; BE-NEXT: ldrb r2, [r0, #2] +; BE-NEXT: orr r1, r2, r1, lsl #8 +; BE-NEXT: orr r1, r1, #384 +; BE-NEXT: strb r1, [r0, #2] +; BE-NEXT: lsr r1, r1, #8 +; BE-NEXT: strh r1, [r0] +; BE-NEXT: mov pc, lr + %aa = load i24, i24* %a, align 1 + %b = or i24 %aa, 384 + store i24 %b, i24* %a, align 1 + ret void +} + +define void @i24_and_or(i24* %a) { +; LE-LABEL: i24_and_or: +; LE: @ BB#0: +; LE-NEXT: ldrh r1, [r0] +; LE-NEXT: mov r2, #16256 +; LE-NEXT: orr r2, r2, #49152 +; LE-NEXT: orr r1, r1, #384 +; LE-NEXT: and r1, r1, r2 +; LE-NEXT: strh r1, [r0] +; LE-NEXT: mov pc, lr +; +; BE-LABEL: i24_and_or: +; BE: @ BB#0: +; BE-NEXT: mov r1, #128 +; BE-NEXT: strb r1, [r0, #2] +; BE-NEXT: ldrh r1, [r0] +; BE-NEXT: orr r1, r1, #1 +; BE-NEXT: strh r1, [r0] +; BE-NEXT: mov pc, lr + %b = load i24, i24* %a, align 1 + %c = and i24 %b, -128 + %d = or i24 %c, 384 + store i24 %d, i24* %a, align 1 + ret void +} + +define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { +; LE-LABEL: i24_insert_bit: +; LE: @ BB#0: +; LE-NEXT: ldrh r2, [r0] +; LE-NEXT: mov r3, #255 +; LE-NEXT: orr r3, r3, #57088 +; LE-NEXT: and r2, r2, r3 +; LE-NEXT: orr r1, r2, r1, lsl #13 +; LE-NEXT: strh r1, [r0] +; LE-NEXT: mov pc, lr +; +; BE-LABEL: i24_insert_bit: +; BE: @ BB#0: +; BE-NEXT: ldrh r2, [r0] +; BE-NEXT: mov r3, #57088 +; BE-NEXT: orr r3, r3, #16711680 +; BE-NEXT: and r2, r3, r2, lsl #8 +; BE-NEXT: orr r1, r2, r1, lsl #13 +; BE-NEXT: lsr r1, r1, #8 +; BE-NEXT: strh r1, [r0] +; BE-NEXT: mov pc, lr + %extbit = zext i1 %bit to i24 + %b = load i24, i24* %a, align 1 + %extbit.shl = shl nuw nsw i24 %extbit, 13 + %c = and i24 %b, -8193 + %d = or i24 %c, %extbit.shl + store i24 %d, i24* %a, align 1 + ret void +} + +define void @i56_or(i56* %a) { +; LE-LABEL: i56_or: +; LE: @ BB#0: +; LE-NEXT: ldr r1, [r0] +; LE-NEXT: orr r1, r1, #384 +; LE-NEXT: str r1, [r0] +; LE-NEXT: mov pc, lr +; +; BE-LABEL: i56_or: +; BE: @ BB#0: +; BE-NEXT: mov r1, r0 +; BE-NEXT: ldr r12, [r0] +; BE-NEXT: ldrh r2, [r1, #4]! +; BE-NEXT: ldrb r3, [r1, #2] +; BE-NEXT: orr r2, r3, r2, lsl #8 +; BE-NEXT: orr r2, r2, r12, lsl #24 +; BE-NEXT: orr r2, r2, #384 +; BE-NEXT: lsr r3, r2, #8 +; BE-NEXT: strb r2, [r1, #2] +; BE-NEXT: strh r3, [r1] +; BE-NEXT: bic r1, r12, #255 +; BE-NEXT: orr r1, r1, r2, lsr #24 +; BE-NEXT: str r1, [r0] +; BE-NEXT: mov pc, lr + %aa = load i56, i56* %a + %b = or i56 %aa, 384 + store i56 %b, i56* %a + ret void +} + +define void @i56_and_or(i56* %a) { +; LE-LABEL: i56_and_or: +; LE: @ BB#0: +; LE-NEXT: ldr r1, [r0] +; LE-NEXT: orr r1, r1, #384 +; LE-NEXT: bic r1, r1, #127 +; LE-NEXT: str r1, [r0] +; LE-NEXT: mov pc, lr +; +; BE-LABEL: i56_and_or: +; BE: @ BB#0: +; BE-NEXT: mov r1, r0 +; BE-NEXT: mov r3, #128 +; BE-NEXT: ldrh r2, [r1, #4]! +; BE-NEXT: strb r3, [r1, #2] +; BE-NEXT: lsl r2, r2, #8 +; BE-NEXT: ldr r12, [r0] +; BE-NEXT: orr r2, r2, r12, lsl #24 +; BE-NEXT: orr r2, r2, #384 +; BE-NEXT: lsr r3, r2, #8 +; BE-NEXT: strh r3, [r1] +; BE-NEXT: bic r1, r12, #255 +; BE-NEXT: orr r1, r1, r2, lsr #24 +; BE-NEXT: str r1, [r0] +; BE-NEXT: mov pc, lr + + %b = load i56, i56* %a, align 1 + %c = and i56 %b, -128 + %d = or i56 %c, 384 + store i56 %d, i56* %a, align 1 + ret void +} + +define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { +; LE-LABEL: i56_insert_bit: +; LE: @ BB#0: +; LE-NEXT: ldr r2, [r0] +; LE-NEXT: bic r2, r2, #8192 +; LE-NEXT: orr r1, r2, r1, lsl #13 +; LE-NEXT: str r1, [r0] +; LE-NEXT: mov pc, lr +; +; BE-LABEL: i56_insert_bit: +; BE: @ BB#0: +; BE-NEXT: .save {r11, lr} +; BE-NEXT: push {r11, lr} +; BE-NEXT: mov r2, r0 +; BE-NEXT: ldr lr, [r0] +; BE-NEXT: ldrh r12, [r2, #4]! +; BE-NEXT: ldrb r3, [r2, #2] +; BE-NEXT: orr r12, r3, r12, lsl #8 +; BE-NEXT: orr r3, r12, lr, lsl #24 +; BE-NEXT: bic r3, r3, #8192 +; BE-NEXT: orr r1, r3, r1, lsl #13 +; BE-NEXT: lsr r3, r1, #8 +; BE-NEXT: strh r3, [r2] +; BE-NEXT: bic r2, lr, #255 +; BE-NEXT: orr r1, r2, r1, lsr #24 +; BE-NEXT: str r1, [r0] +; BE-NEXT: pop {r11, lr} +; BE-NEXT: mov pc, lr + %extbit = zext i1 %bit to i56 + %b = load i56, i56* %a, align 1 + %extbit.shl = shl nuw nsw i56 %extbit, 13 + %c = and i56 %b, -8193 + %d = or i56 %c, %extbit.shl + store i56 %d, i56* %a, align 1 + ret void +} + diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll index d15ef14b44932..90defad43a7d8 100644 --- a/test/CodeGen/ARM/indirectbr.ll +++ b/test/CodeGen/ARM/indirectbr.ll @@ -47,6 +47,7 @@ L3: ; preds = %L4, %bb2 br label %L2 L2: ; preds = %L3, %bb2 +; THUMB-LABEL: %L1.clone ; THUMB: muls %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ] ; <i32> [#uses=1] %phitmp = mul i32 %res.2, 6 ; <i32> [#uses=1] diff --git a/test/CodeGen/ARM/interval-update-remat.ll b/test/CodeGen/ARM/interval-update-remat.ll index 6391d4c29604f..524e8a0aa491a 100644 --- a/test/CodeGen/ARM/interval-update-remat.ll +++ b/test/CodeGen/ARM/interval-update-remat.ll @@ -109,7 +109,7 @@ _ZN7MessageD1Ev.exit: ; preds = %if.then.i.i.i.i, %i } ; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.start(i64, i8* nocapture) #0 +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 declare %class.StaticSocketDataProvider.6.231.281.1306.2331* @_ZN24StaticSocketDataProviderC1EP13MockReadWritejS1_j(%class.StaticSocketDataProvider.6.231.281.1306.2331* returned, %struct.MockReadWrite.7.232.282.1307.2332*, i32, %struct.MockReadWrite.7.232.282.1307.2332*, i32) unnamed_addr @@ -130,7 +130,7 @@ declare %class.Message.13.238.288.1313.2338* @_ZN7MessageC1Ev(%class.Message.13. declare %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperD1Ev(%class.AssertHelper.10.235.285.1310.2335* returned) unnamed_addr ; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.end(i64, i8* nocapture) #0 +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0 declare void @_ZN18ClientSocketHandle5m_fn3IPiEEvRK25Trans_NS___1_basic_stringIciiE13scoped_refptr15RequestPriorityN16ClientSocketPool13RespectLimitsERiT_11BoundNetLog(%class.ClientSocketHandle.14.239.289.1314.2339*, %class.Trans_NS___1_basic_string.18.243.293.1318.2343* dereferenceable(12), %class.scoped_refptr.19.244.294.1319.2344*, i32, i32, i32* dereferenceable(4), i32*, %class.BoundNetLog.20.245.295.1320.2345*) diff --git a/test/CodeGen/ARM/intrinsics-coprocessor.ll b/test/CodeGen/ARM/intrinsics-coprocessor.ll index 8fea49b39fb60..5352471238f9c 100644 --- a/test/CodeGen/ARM/intrinsics-coprocessor.ll +++ b/test/CodeGen/ARM/intrinsics-coprocessor.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8 | FileCheck %s -; RUN: llc < %s -march=thumb -mtriple=thumbv7-eabi -mcpu=cortex-a8 | FileCheck %s define void @coproc(i8* %i) nounwind { entry: diff --git a/test/CodeGen/ARM/ldm-stm-i256.ll b/test/CodeGen/ARM/ldm-stm-i256.ll new file mode 100644 index 0000000000000..7b4151dabf6dd --- /dev/null +++ b/test/CodeGen/ARM/ldm-stm-i256.ll @@ -0,0 +1,38 @@ +; RUN: llc -mtriple=armv7--eabi -verify-machineinstrs < %s | FileCheck %s + +; Check the way we schedule/merge a bunch of loads and stores. +; Originally test/CodeGen/ARM/2011-07-07-ScheduleDAGCrash.ll ; now +; being used as a test of optimizations related to ldm/stm. + +; FIXME: We could merge more loads/stores with regalloc hints. +; FIXME: Fix scheduling so we don't have 16 live registers. + +define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp { +entry: + %c = load i256, i256* %cc + %d = load i256, i256* %dd + %add = add nsw i256 %c, %d + store i256 %add, i256* %a, align 8 + %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376 + %add6 = add nsw i256 %or, %d + store i256 %add6, i256* %b, align 8 + ret void + ; CHECK-DAG: ldm r3 + ; CHECK-DAG: ldm r2 + ; CHECK-DAG: ldr {{.*}}, [r3, #20] + ; CHECK-DAG: ldr {{.*}}, [r3, #16] + ; CHECK-DAG: ldr {{.*}}, [r3, #28] + ; CHECK-DAG: ldr {{.*}}, [r3, #24] + ; CHECK-DAG: ldr {{.*}}, [r2, #20] + ; CHECK-DAG: ldr {{.*}}, [r2, #16] + ; CHECK-DAG: ldr {{.*}}, [r2, #28] + ; CHECK-DAG: ldr {{.*}}, [r2, #24] + ; CHECK-DAG: stmib r0 + ; CHECK-DAG: str {{.*}}, [r0] + ; CHECK-DAG: str {{.*}}, [r0, #24] + ; CHECK-DAG: str {{.*}}, [r0, #28] + ; CHECK-DAG: str {{.*}}, [r1] + ; CHECK-DAG: stmib r1 + ; CHECK-DAG: str {{.*}}, [r1, #24] + ; CHECK-DAG: str {{.*}}, [r1, #28] +} diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index 6a9e63f649c93..6981cfcb08550 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -80,7 +80,7 @@ return: ; preds = %bb, %entry ; CHECK-LABEL: Func1: define void @Func1() nounwind ssp "no-frame-pointer-elim"="true" { -entry: +entry: ; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}} ; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}} ; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4] @@ -88,12 +88,12 @@ entry: ; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}} ; CONSERVATIVE-NOT: ldrd %orig_blocks = alloca [256 x i16], align 2 - %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start(i64 512, i8* %0) nounwind + %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start.p0i8(i64 512, i8* %0) nounwind %tmp1 = load i32, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 1), align 4 %tmp2 = load i32, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 2), align 4 %add = add nsw i32 %tmp2, %tmp1 store i32 %add, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 0), align 4 - call void @llvm.lifetime.end(i64 512, i8* %0) nounwind + call void @llvm.lifetime.end.p0i8(i64 512, i8* %0) nounwind ret void } @@ -189,5 +189,23 @@ define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) "no-frame-pointer-e ret i32* %p1 } -declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind -declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind +; CHECK-LABEL: ldrd_strd_aa: +; NORMAL: ldrd [[TMP1:r[0-9]]], [[TMP2:r[0-9]]], +; NORMAL: strd [[TMP1]], [[TMP2]], +; CONSERVATIVE-NOT: ldrd +; CONSERVATIVE-NOT: strd +; CHECK: bx lr + +define void @ldrd_strd_aa(i32* noalias nocapture %x, i32* noalias nocapture readonly %y) { +entry: + %0 = load i32, i32* %y, align 4 + store i32 %0, i32* %x, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %y, i32 1 + %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %x, i32 1 + store i32 %1, i32* %arrayidx3, align 4 + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind diff --git a/test/CodeGen/ARM/load-combine-big-endian.ll b/test/CodeGen/ARM/load-combine-big-endian.ll new file mode 100644 index 0000000000000..8d8a0136cf962 --- /dev/null +++ b/test/CodeGen/ARM/load-combine-big-endian.ll @@ -0,0 +1,779 @@ +; RUN: llc < %s -mtriple=armeb-unknown | FileCheck %s +; RUN: llc < %s -mtriple=armv6eb-unknown | FileCheck %s --check-prefix=CHECK-ARMv6 + +; i8* p; // p is 4 byte aligned +; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] +define i32 @load_i32_by_i8_big_endian(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_big_endian: +; CHECK: ldr r0, [r0] +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_big_endian: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: bx lr + %tmp = bitcast i32* %arg to i8* + %tmp1 = load i8, i8* %tmp, align 4 + %tmp2 = zext i8 %tmp1 to i32 + %tmp3 = shl nuw nsw i32 %tmp2, 24 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 8 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = or i32 %tmp13, %tmp16 + ret i32 %tmp17 +} + +; i8* p; // p is 4 byte aligned +; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) +define i32 @load_i32_by_i8_bswap(i32* %arg) { +; BSWAP is not supported by 32 bit target +; CHECK-LABEL: load_i32_by_i8_bswap: +; CHECK: ldr r0, [r0] +; CHECK: and +; CHECK-NEXT: and +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp, align 4 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +; i8* p; // p is 4 byte aligned +; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4]) +define i32 @load_i32_by_i16_by_i8_big_endian(i32* %arg) { +; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian: +; CHECK: ldr r0, [r0] +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i16_by_i8_big_endian: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: bx lr + %tmp = bitcast i32* %arg to i8* + %tmp1 = load i8, i8* %tmp, align 4 + %tmp2 = zext i8 %tmp1 to i16 + %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp4 = load i8, i8* %tmp3, align 1 + %tmp5 = zext i8 %tmp4 to i16 + %tmp6 = shl nuw nsw i16 %tmp2, 8 + %tmp7 = or i16 %tmp6, %tmp5 + %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp9 = load i8, i8* %tmp8, align 1 + %tmp10 = zext i8 %tmp9 to i16 + %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp12 = load i8, i8* %tmp11, align 1 + %tmp13 = zext i8 %tmp12 to i16 + %tmp14 = shl nuw nsw i16 %tmp10, 8 + %tmp15 = or i16 %tmp14, %tmp13 + %tmp16 = zext i16 %tmp7 to i32 + %tmp17 = zext i16 %tmp15 to i32 + %tmp18 = shl nuw nsw i32 %tmp16, 16 + %tmp19 = or i32 %tmp18, %tmp17 + ret i32 %tmp19 +} + +; i16* p; // p is 4 byte aligned +; ((i32) p[0] << 16) | (i32) p[1] +define i32 @load_i32_by_i16(i32* %arg) { +; CHECK-LABEL: load_i32_by_i16: +; CHECK: ldr r0, [r0] +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i16: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: bx lr + %tmp = bitcast i32* %arg to i16* + %tmp1 = load i16, i16* %tmp, align 4 + %tmp2 = zext i16 %tmp1 to i32 + %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 + %tmp4 = load i16, i16* %tmp3, align 1 + %tmp5 = zext i16 %tmp4 to i32 + %tmp6 = shl nuw nsw i32 %tmp2, 16 + %tmp7 = or i32 %tmp6, %tmp5 + ret i32 %tmp7 +} + +; i16* p_16; // p_16 is 4 byte aligned +; i8* p_8 = (i8*) p_16; +; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3] +define i32 @load_i32_by_i16_i8(i32* %arg) { +; CHECK-LABEL: load_i32_by_i16_i8: +; CHECK: ldr r0, [r0] +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i16_i8: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: bx lr + %tmp = bitcast i32* %arg to i16* + %tmp1 = bitcast i32* %arg to i8* + %tmp2 = load i16, i16* %tmp, align 4 + %tmp3 = zext i16 %tmp2 to i32 + %tmp4 = shl nuw nsw i32 %tmp3, 16 + %tmp5 = getelementptr inbounds i8, i8* %tmp1, i32 2 + %tmp6 = load i8, i8* %tmp5, align 1 + %tmp7 = zext i8 %tmp6 to i32 + %tmp8 = shl nuw nsw i32 %tmp7, 8 + %tmp9 = getelementptr inbounds i8, i8* %tmp1, i32 3 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = or i32 %tmp8, %tmp11 + %tmp13 = or i32 %tmp12, %tmp4 + ret i32 %tmp13 +} + +; i8* p; // p is 8 byte aligned +; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56) +define i64 @load_i64_by_i8_bswap(i64* %arg) { +; CHECK-LABEL: load_i64_by_i8_bswap: +; CHECK: ldr{{.*}}r0 +; CHECK: ldr{{.*}}r0 +; CHECK: and +; CHECK-NEXT: and +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK-NEXT: and +; CHECK-NEXT: orr +; CHECK-NEXT: and +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap: +; CHECK-ARMv6: ldrd r2, r3, [r0] +; CHECK-ARMv6: rev r0, r3 +; CHECK-ARMv6: rev r1, r2 +; CHECK-ARMv6: bx lr + %tmp = bitcast i64* %arg to i8* + %tmp1 = load i8, i8* %tmp, align 8 + %tmp2 = zext i8 %tmp1 to i64 + %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1 + %tmp4 = load i8, i8* %tmp3, align 1 + %tmp5 = zext i8 %tmp4 to i64 + %tmp6 = shl nuw nsw i64 %tmp5, 8 + %tmp7 = or i64 %tmp6, %tmp2 + %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2 + %tmp9 = load i8, i8* %tmp8, align 1 + %tmp10 = zext i8 %tmp9 to i64 + %tmp11 = shl nuw nsw i64 %tmp10, 16 + %tmp12 = or i64 %tmp7, %tmp11 + %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3 + %tmp14 = load i8, i8* %tmp13, align 1 + %tmp15 = zext i8 %tmp14 to i64 + %tmp16 = shl nuw nsw i64 %tmp15, 24 + %tmp17 = or i64 %tmp12, %tmp16 + %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4 + %tmp19 = load i8, i8* %tmp18, align 1 + %tmp20 = zext i8 %tmp19 to i64 + %tmp21 = shl nuw nsw i64 %tmp20, 32 + %tmp22 = or i64 %tmp17, %tmp21 + %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5 + %tmp24 = load i8, i8* %tmp23, align 1 + %tmp25 = zext i8 %tmp24 to i64 + %tmp26 = shl nuw nsw i64 %tmp25, 40 + %tmp27 = or i64 %tmp22, %tmp26 + %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6 + %tmp29 = load i8, i8* %tmp28, align 1 + %tmp30 = zext i8 %tmp29 to i64 + %tmp31 = shl nuw nsw i64 %tmp30, 48 + %tmp32 = or i64 %tmp27, %tmp31 + %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7 + %tmp34 = load i8, i8* %tmp33, align 1 + %tmp35 = zext i8 %tmp34 to i64 + %tmp36 = shl nuw i64 %tmp35, 56 + %tmp37 = or i64 %tmp32, %tmp36 + ret i64 %tmp37 +} + +; i8* p; // p is 8 byte aligned +; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7] +define i64 @load_i64_by_i8(i64* %arg) { +; CHECK-LABEL: load_i64_by_i8: +; CHECK: ldr r2, [r0] +; CHECK: ldr r1, [r0, #4] +; CHECK: mov r0, r2 +; CHECK: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i64_by_i8: +; CHECK-ARMv6: ldrd r0, r1, [r0] +; CHECK-ARMv6: bx lr + %tmp = bitcast i64* %arg to i8* + %tmp1 = load i8, i8* %tmp, align 8 + %tmp2 = zext i8 %tmp1 to i64 + %tmp3 = shl nuw i64 %tmp2, 56 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i64 + %tmp7 = shl nuw nsw i64 %tmp6, 48 + %tmp8 = or i64 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i64 + %tmp12 = shl nuw nsw i64 %tmp11, 40 + %tmp13 = or i64 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i64 + %tmp17 = shl nuw nsw i64 %tmp16, 32 + %tmp18 = or i64 %tmp13, %tmp17 + %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4 + %tmp20 = load i8, i8* %tmp19, align 1 + %tmp21 = zext i8 %tmp20 to i64 + %tmp22 = shl nuw nsw i64 %tmp21, 24 + %tmp23 = or i64 %tmp18, %tmp22 + %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5 + %tmp25 = load i8, i8* %tmp24, align 1 + %tmp26 = zext i8 %tmp25 to i64 + %tmp27 = shl nuw nsw i64 %tmp26, 16 + %tmp28 = or i64 %tmp23, %tmp27 + %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6 + %tmp30 = load i8, i8* %tmp29, align 1 + %tmp31 = zext i8 %tmp30 to i64 + %tmp32 = shl nuw nsw i64 %tmp31, 8 + %tmp33 = or i64 %tmp28, %tmp32 + %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7 + %tmp35 = load i8, i8* %tmp34, align 1 + %tmp36 = zext i8 %tmp35 to i64 + %tmp37 = or i64 %tmp33, %tmp36 + ret i64 %tmp37 +} + +; i8* p; // p[1] is 4 byte aligned +; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24) +define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_nonzero_offset: +; CHECK: ldr r0, [r0, #1] +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: and r2, r2, r0, lsl #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset: +; CHECK-ARMv6: ldr r0, [r0, #1] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 4 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +; i8* p; // p[-4] is 4 byte aligned +; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24) +define i32 @load_i32_by_i8_neg_offset(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_neg_offset: +; CHECK: ldr r0, [r0, #-4] +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: and r2, r2, r0, lsl #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset: +; CHECK-ARMv6: ldr r0, [r0, #-4] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4 + %tmp2 = load i8, i8* %tmp1, align 4 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +; i8* p; // p[1] is 4 byte aligned +; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24) +define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap: +; CHECK: ldr r0, [r0, #1] +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap: +; CHECK-ARMv6: ldr r0, [r0, #1] +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp15 = load i8, i8* %tmp14, align 4 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +; i8* p; // p[-4] is 4 byte aligned +; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24) +define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap: +; CHECK: ldr r0, [r0, #-4] +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap: +; CHECK-ARMv6: ldr r0, [r0, #-4] +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4 + %tmp15 = load i8, i8* %tmp14, align 4 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +declare i16 @llvm.bswap.i16(i16) + +; i16* p; // p is 4 byte aligned +; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16) +define i32 @load_i32_by_bswap_i16(i32* %arg) { +; CHECK-LABEL: load_i32_by_bswap_i16: +; CHECK: ldr r0, [r0] +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: and r2, r2, r0, lsl #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i16* + %tmp1 = load i16, i16* %tmp, align 4 + %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1) + %tmp2 = zext i16 %tmp11 to i32 + %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 + %tmp4 = load i16, i16* %tmp3, align 1 + %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4) + %tmp5 = zext i16 %tmp41 to i32 + %tmp6 = shl nuw nsw i32 %tmp5, 16 + %tmp7 = or i32 %tmp6, %tmp2 + ret i32 %tmp7 +} + +; i16* p; // p is 4 byte aligned +; (i32) p[1] | (sext(p[0] << 16) to i32) +define i32 @load_i32_by_sext_i16(i32* %arg) { +; CHECK-LABEL: load_i32_by_sext_i16: +; CHECK: ldr r0, [r0] +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: load_i32_by_sext_i16: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: bx lr + %tmp = bitcast i32* %arg to i16* + %tmp1 = load i16, i16* %tmp, align 4 + %tmp2 = sext i16 %tmp1 to i32 + %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 + %tmp4 = load i16, i16* %tmp3, align 1 + %tmp5 = zext i16 %tmp4 to i32 + %tmp6 = shl nuw nsw i32 %tmp2, 16 + %tmp7 = or i32 %tmp6, %tmp5 + ret i32 %tmp7 +} + +; i8* arg; i32 i; +; p = arg + 12; +; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24) +define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) { +; CHECK-LABEL: load_i32_by_i8_base_offset_index: +; CHECK: add r0, r0, r1 +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: ldr r0, [r0, #12] +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: and r2, r2, r0, lsl #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index: +; CHECK-ARMv6: add r0, r0, r1 +; CHECK-ARMv6-NEXT: ldr r0, [r0, #12] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + %tmp = add nuw nsw i32 %i, 3 + %tmp2 = add nuw nsw i32 %i, 2 + %tmp3 = add nuw nsw i32 %i, 1 + %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 + %tmp5 = zext i32 %i to i64 + %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5 + %tmp7 = load i8, i8* %tmp6, align 4 + %tmp8 = zext i8 %tmp7 to i32 + %tmp9 = zext i32 %tmp3 to i64 + %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9 + %tmp11 = load i8, i8* %tmp10, align 1 + %tmp12 = zext i8 %tmp11 to i32 + %tmp13 = shl nuw nsw i32 %tmp12, 8 + %tmp14 = or i32 %tmp13, %tmp8 + %tmp15 = zext i32 %tmp2 to i64 + %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15 + %tmp17 = load i8, i8* %tmp16, align 1 + %tmp18 = zext i8 %tmp17 to i32 + %tmp19 = shl nuw nsw i32 %tmp18, 16 + %tmp20 = or i32 %tmp14, %tmp19 + %tmp21 = zext i32 %tmp to i64 + %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21 + %tmp23 = load i8, i8* %tmp22, align 1 + %tmp24 = zext i8 %tmp23 to i32 + %tmp25 = shl nuw i32 %tmp24, 24 + %tmp26 = or i32 %tmp20, %tmp25 + ret i32 %tmp26 +} + +; i8* arg; i32 i; +; p = arg + 12; +; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) +define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { +; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: +; CHECK: add r0, r0, r1 +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: ldr r0, [r0, #13] +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: and r2, r2, r0, lsl #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2: +; CHECK-ARMv6: add r0, r0, r1 +; CHECK-ARMv6-NEXT: ldr r0, [r0, #13] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = add nuw nsw i32 %i, 4 + %tmp2 = add nuw nsw i32 %i, 3 + %tmp3 = add nuw nsw i32 %i, 2 + %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 + %tmp5 = add nuw nsw i32 %i, 1 + %tmp27 = zext i32 %tmp5 to i64 + %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27 + %tmp29 = load i8, i8* %tmp28, align 4 + %tmp30 = zext i8 %tmp29 to i32 + %tmp31 = zext i32 %tmp3 to i64 + %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31 + %tmp33 = load i8, i8* %tmp32, align 1 + %tmp34 = zext i8 %tmp33 to i32 + %tmp35 = shl nuw nsw i32 %tmp34, 8 + %tmp36 = or i32 %tmp35, %tmp30 + %tmp37 = zext i32 %tmp2 to i64 + %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37 + %tmp39 = load i8, i8* %tmp38, align 1 + %tmp40 = zext i8 %tmp39 to i32 + %tmp41 = shl nuw nsw i32 %tmp40, 16 + %tmp42 = or i32 %tmp36, %tmp41 + %tmp43 = zext i32 %tmp to i64 + %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43 + %tmp45 = load i8, i8* %tmp44, align 1 + %tmp46 = zext i8 %tmp45 to i32 + %tmp47 = shl nuw i32 %tmp46, 24 + %tmp48 = or i32 %tmp42, %tmp47 + ret i32 %tmp48 +} + +; i8* p; // p is 2 byte aligned +; (i32) p[0] | ((i32) p[1] << 8) +define i32 @zext_load_i32_by_i8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 8) | ((i32) p[1] << 16) +define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r0, r0, #16 +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r0, r0, #16 +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 16) | ((i32) p[1] << 24) +define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r0, r0, #24 +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; (i32) p[1] | ((i32) p[0] << 8) +define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 8) | ((i32) p[0] << 16) +define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r1, r1, #16 +; CHECK-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r1, r1, #16 +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 16) | ((i32) p[0] << 24) +define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r1, r1, #24 +; CHECK-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r1, r1, #24 +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; +; i16* p1.i16 = (i16*) p; +; (p1.i16[0] << 8) | ((i16) p[2]) +; +; This is essentialy a i16 load from p[1], but we don't fold the pattern now +; because in the original DAG we don't have p[1] address available +define i16 @load_i16_from_nonzero_offset(i8* %p) { +; CHECK-LABEL: load_i16_from_nonzero_offset: +; CHECK: ldrh r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #2] +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset: +; CHECK-ARMv6: ldrh r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #2] +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %p1.i16 = bitcast i8* %p to i16* + %p2.i8 = getelementptr i8, i8* %p, i64 2 + %v1 = load i16, i16* %p1.i16 + %v2.i8 = load i8, i8* %p2.i8 + %v2 = zext i8 %v2.i8 to i16 + %v1.shl = shl i16 %v1, 8 + %res = or i16 %v1.shl, %v2 + ret i16 %res +} diff --git a/test/CodeGen/ARM/load-combine.ll b/test/CodeGen/ARM/load-combine.ll new file mode 100644 index 0000000000000..720bc7b88b32f --- /dev/null +++ b/test/CodeGen/ARM/load-combine.ll @@ -0,0 +1,692 @@ +; RUN: llc < %s -mtriple=arm-unknown | FileCheck %s +; RUN: llc < %s -mtriple=armv6-unknown | FileCheck %s --check-prefix=CHECK-ARMv6 + +; i8* p; // p is 1 byte aligned +; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) +define i32 @load_i32_by_i8_unaligned(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_unaligned: +; CHECK: ldrb{{.*}}r0 +; CHECK: ldrb{{.*}}r0 +; CHECK: ldrb{{.*}}r0 +; CHECK: ldrb{{.*}}r0 +; CHECK: orr +; CHECK: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_unaligned: +; CHECK-ARMv6: ldrb{{.*}}r0 +; CHECK-ARMv6: ldrb{{.*}}r0 +; CHECK-ARMv6: ldrb{{.*}}r0 +; CHECK-ARMv6: ldrb{{.*}}r0 +; CHECK-ARMv6: orr +; CHECK-ARMv6: bx lr + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +; i8* p; // p is 4 byte aligned +; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) +define i32 @load_i32_by_i8_aligned(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_aligned: +; CHECK: ldr r0, [r0] +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_aligned: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: bx lr + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp, align 4 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +; i8* p; // p is 4 byte aligned +; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] +define i32 @load_i32_by_i8_bswap(i32* %arg) { +; BSWAP is not supported by 32 bit target +; CHECK-LABEL: load_i32_by_i8_bswap: +; CHECK: ldr r0, [r0] +; CHECK: and +; CHECK-NEXT: and +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + %tmp = bitcast i32* %arg to i8* + %tmp1 = load i8, i8* %tmp, align 4 + %tmp2 = zext i8 %tmp1 to i32 + %tmp3 = shl nuw nsw i32 %tmp2, 24 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 8 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = or i32 %tmp13, %tmp16 + ret i32 %tmp17 +} + +; i8* p; // p is 8 byte aligned +; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56) +define i64 @load_i64_by_i8(i64* %arg) { +; CHECK-LABEL: load_i64_by_i8: +; CHECK: ldr r2, [r0] +; CHECK-NEXT: ldr r1, [r0, #4] +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i64_by_i8: +; CHECK-ARMv6: ldrd r0, r1, [r0] +; CHECK-ARMv6: bx lr + %tmp = bitcast i64* %arg to i8* + %tmp1 = load i8, i8* %tmp, align 8 + %tmp2 = zext i8 %tmp1 to i64 + %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1 + %tmp4 = load i8, i8* %tmp3, align 1 + %tmp5 = zext i8 %tmp4 to i64 + %tmp6 = shl nuw nsw i64 %tmp5, 8 + %tmp7 = or i64 %tmp6, %tmp2 + %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2 + %tmp9 = load i8, i8* %tmp8, align 1 + %tmp10 = zext i8 %tmp9 to i64 + %tmp11 = shl nuw nsw i64 %tmp10, 16 + %tmp12 = or i64 %tmp7, %tmp11 + %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3 + %tmp14 = load i8, i8* %tmp13, align 1 + %tmp15 = zext i8 %tmp14 to i64 + %tmp16 = shl nuw nsw i64 %tmp15, 24 + %tmp17 = or i64 %tmp12, %tmp16 + %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4 + %tmp19 = load i8, i8* %tmp18, align 1 + %tmp20 = zext i8 %tmp19 to i64 + %tmp21 = shl nuw nsw i64 %tmp20, 32 + %tmp22 = or i64 %tmp17, %tmp21 + %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5 + %tmp24 = load i8, i8* %tmp23, align 1 + %tmp25 = zext i8 %tmp24 to i64 + %tmp26 = shl nuw nsw i64 %tmp25, 40 + %tmp27 = or i64 %tmp22, %tmp26 + %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6 + %tmp29 = load i8, i8* %tmp28, align 1 + %tmp30 = zext i8 %tmp29 to i64 + %tmp31 = shl nuw nsw i64 %tmp30, 48 + %tmp32 = or i64 %tmp27, %tmp31 + %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7 + %tmp34 = load i8, i8* %tmp33, align 1 + %tmp35 = zext i8 %tmp34 to i64 + %tmp36 = shl nuw i64 %tmp35, 56 + %tmp37 = or i64 %tmp32, %tmp36 + ret i64 %tmp37 +} + +; i8* p; // p is 8 byte aligned +; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7] +define i64 @load_i64_by_i8_bswap(i64* %arg) { +; CHECK-LABEL: load_i64_by_i8_bswap: +; CHECK: ldr{{.*}}r0 +; CHECK: ldr{{.*}}r0 +; CHECK: and +; CHECK-NEXT: and +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK-NEXT: and +; CHECK-NEXT: orr +; CHECK-NEXT: and +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK-NEXT: orr +; CHECK: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap: +; CHECK-ARMv6: ldrd r2, r3, [r0] +; CHECK-ARMv6: rev r0, r3 +; CHECK-ARMv6: rev r1, r2 +; CHECK-ARMv6: bx lr + %tmp = bitcast i64* %arg to i8* + %tmp1 = load i8, i8* %tmp, align 8 + %tmp2 = zext i8 %tmp1 to i64 + %tmp3 = shl nuw i64 %tmp2, 56 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i64 + %tmp7 = shl nuw nsw i64 %tmp6, 48 + %tmp8 = or i64 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i64 + %tmp12 = shl nuw nsw i64 %tmp11, 40 + %tmp13 = or i64 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i64 + %tmp17 = shl nuw nsw i64 %tmp16, 32 + %tmp18 = or i64 %tmp13, %tmp17 + %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4 + %tmp20 = load i8, i8* %tmp19, align 1 + %tmp21 = zext i8 %tmp20 to i64 + %tmp22 = shl nuw nsw i64 %tmp21, 24 + %tmp23 = or i64 %tmp18, %tmp22 + %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5 + %tmp25 = load i8, i8* %tmp24, align 1 + %tmp26 = zext i8 %tmp25 to i64 + %tmp27 = shl nuw nsw i64 %tmp26, 16 + %tmp28 = or i64 %tmp23, %tmp27 + %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6 + %tmp30 = load i8, i8* %tmp29, align 1 + %tmp31 = zext i8 %tmp30 to i64 + %tmp32 = shl nuw nsw i64 %tmp31, 8 + %tmp33 = or i64 %tmp28, %tmp32 + %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7 + %tmp35 = load i8, i8* %tmp34, align 1 + %tmp36 = zext i8 %tmp35 to i64 + %tmp37 = or i64 %tmp33, %tmp36 + ret i64 %tmp37 +} + +; i8* p; // p[1] is 4 byte aligned +; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24) +define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_nonzero_offset: +; CHECK: ldr r0, [r0, #1] +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset: +; CHECK-ARMv6: ldr r0, [r0, #1] +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 4 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +; i8* p; // p[-4] is 4 byte aligned +; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24) +define i32 @load_i32_by_i8_neg_offset(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_neg_offset: +; CHECK: ldr r0, [r0, #-4] +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset: +; CHECK-ARMv6: ldr r0, [r0, #-4] +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4 + %tmp2 = load i8, i8* %tmp1, align 4 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1 + %tmp15 = load i8, i8* %tmp14, align 1 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +; i8* p; // p[1] is 4 byte aligned +; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24) +define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap: +; CHECK: ldr r0, [r0, #1] +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: and r2, r2, r0, lsl #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap: +; CHECK-ARMv6: ldr r0, [r0, #1] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp15 = load i8, i8* %tmp14, align 4 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +; i8* p; // p[-4] is 4 byte aligned +; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24) +define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) { +; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap: +; CHECK: ldr r0, [r0, #-4] +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: and r2, r2, r0, lsl #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap: +; CHECK-ARMv6: ldr r0, [r0, #-4] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3 + %tmp10 = load i8, i8* %tmp9, align 1 + %tmp11 = zext i8 %tmp10 to i32 + %tmp12 = shl nuw nsw i32 %tmp11, 16 + %tmp13 = or i32 %tmp8, %tmp12 + %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4 + %tmp15 = load i8, i8* %tmp14, align 4 + %tmp16 = zext i8 %tmp15 to i32 + %tmp17 = shl nuw nsw i32 %tmp16, 24 + %tmp18 = or i32 %tmp13, %tmp17 + ret i32 %tmp18 +} + +declare i16 @llvm.bswap.i16(i16) + +; i16* p; // p is 4 byte aligned +; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16) +define i32 @load_i32_by_bswap_i16(i32* %arg) { +; CHECK-LABEL: load_i32_by_bswap_i16: +; CHECK: ldr r0, [r0] +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: and r2, r2, r0, lsl #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: mov pc, lr + +; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i16* + %tmp1 = load i16, i16* %tmp, align 4 + %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1) + %tmp2 = zext i16 %tmp11 to i32 + %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 + %tmp4 = load i16, i16* %tmp3, align 1 + %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4) + %tmp5 = zext i16 %tmp41 to i32 + %tmp6 = shl nuw nsw i32 %tmp2, 16 + %tmp7 = or i32 %tmp6, %tmp5 + ret i32 %tmp7 +} + +; i16* p; +; (i32) p[0] | (sext(p[1] << 16) to i32) +define i32 @load_i32_by_sext_i16(i32* %arg) { +; CHECK-LABEL: load_i32_by_sext_i16: +; CHECK: ldr r0, [r0] +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: load_i32_by_sext_i16: +; CHECK-ARMv6: ldr r0, [r0] +; CHECK-ARMv6-NEXT: bx lr + %tmp = bitcast i32* %arg to i16* + %tmp1 = load i16, i16* %tmp, align 4 + %tmp2 = zext i16 %tmp1 to i32 + %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 + %tmp4 = load i16, i16* %tmp3, align 1 + %tmp5 = sext i16 %tmp4 to i32 + %tmp6 = shl nuw nsw i32 %tmp5, 16 + %tmp7 = or i32 %tmp6, %tmp2 + ret i32 %tmp7 +} + +; i8* arg; i32 i; +; p = arg + 12; +; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24) +define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) { +; CHECK-LABEL: load_i32_by_i8_base_offset_index: +; CHECK: add r0, r0, r1 +; CHECK-NEXT: ldr r0, [r0, #12] +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index: +; CHECK-ARMv6: add r0, r0, r1 +; CHECK-ARMv6-NEXT: ldr r0, [r0, #12] +; CHECK-ARMv6-NEXT: bx lr + + %tmp = add nuw nsw i32 %i, 3 + %tmp2 = add nuw nsw i32 %i, 2 + %tmp3 = add nuw nsw i32 %i, 1 + %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 + %tmp5 = zext i32 %i to i64 + %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5 + %tmp7 = load i8, i8* %tmp6, align 4 + %tmp8 = zext i8 %tmp7 to i32 + %tmp9 = zext i32 %tmp3 to i64 + %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9 + %tmp11 = load i8, i8* %tmp10, align 1 + %tmp12 = zext i8 %tmp11 to i32 + %tmp13 = shl nuw nsw i32 %tmp12, 8 + %tmp14 = or i32 %tmp13, %tmp8 + %tmp15 = zext i32 %tmp2 to i64 + %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15 + %tmp17 = load i8, i8* %tmp16, align 1 + %tmp18 = zext i8 %tmp17 to i32 + %tmp19 = shl nuw nsw i32 %tmp18, 16 + %tmp20 = or i32 %tmp14, %tmp19 + %tmp21 = zext i32 %tmp to i64 + %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21 + %tmp23 = load i8, i8* %tmp22, align 1 + %tmp24 = zext i8 %tmp23 to i32 + %tmp25 = shl nuw i32 %tmp24, 24 + %tmp26 = or i32 %tmp20, %tmp25 + ret i32 %tmp26 +} + +; i8* arg; i32 i; +; p = arg + 12; +; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) +define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { +; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: +; CHECK: add r0, r0, r1 +; CHECK-NEXT: ldr r0, [r0, #13] +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2: +; CHECK-ARMv6: add r0, r0, r1 +; CHECK-ARMv6-NEXT: ldr r0, [r0, #13] +; CHECK-ARMv6-NEXT: bx lr + %tmp = add nuw nsw i32 %i, 4 + %tmp2 = add nuw nsw i32 %i, 3 + %tmp3 = add nuw nsw i32 %i, 2 + %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 + %tmp5 = add nuw nsw i32 %i, 1 + %tmp27 = zext i32 %tmp5 to i64 + %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27 + %tmp29 = load i8, i8* %tmp28, align 4 + %tmp30 = zext i8 %tmp29 to i32 + %tmp31 = zext i32 %tmp3 to i64 + %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31 + %tmp33 = load i8, i8* %tmp32, align 1 + %tmp34 = zext i8 %tmp33 to i32 + %tmp35 = shl nuw nsw i32 %tmp34, 8 + %tmp36 = or i32 %tmp35, %tmp30 + %tmp37 = zext i32 %tmp2 to i64 + %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37 + %tmp39 = load i8, i8* %tmp38, align 1 + %tmp40 = zext i8 %tmp39 to i32 + %tmp41 = shl nuw nsw i32 %tmp40, 16 + %tmp42 = or i32 %tmp36, %tmp41 + %tmp43 = zext i32 %tmp to i64 + %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43 + %tmp45 = load i8, i8* %tmp44, align 1 + %tmp46 = zext i8 %tmp45 to i32 + %tmp47 = shl nuw i32 %tmp46, 24 + %tmp48 = or i32 %tmp42, %tmp47 + ret i32 %tmp48 +} + +; i8* p; // p is 2 byte aligned +; (i32) p[0] | ((i32) p[1] << 8) +define i32 @zext_load_i32_by_i8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 8) | ((i32) p[1] << 16) +define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r0, r0, #16 +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r0, r0, #16 +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 16) | ((i32) p[1] << 24) +define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r0, r0, #24 +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; (i32) p[1] | ((i32) p[0] << 8) +define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 8) | ((i32) p[0] << 16) +define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r1, r1, #16 +; CHECK-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r1, r1, #16 +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 16) | ((i32) p[0] << 24) +define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r1, r1, #24 +; CHECK-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r1, r1, #24 +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll index 80cb5096c03c5..9ecda8b06cbf2 100644 --- a/test/CodeGen/ARM/longMAC.ll +++ b/test/CodeGen/ARM/longMAC.ll @@ -1,14 +1,15 @@ ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-LE -; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s --check-prefix=CHECK-V7-LE +; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-V7-LE ; RUN: llc -mtriple=armeb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE -; RUN: llc -mtriple=armebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-BE -; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB -; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB2 -; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB -; RUN: llc -mtriple=thumbebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB-BE -; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6M-THUMB -; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7M-THUMB -; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7EM-THUMB +; RUN: llc -mtriple=armebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V7-BE +; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V6-THUMB +; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2-DSP +; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2-DSP +; RUN: llc -mtriple=thumbebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V7-THUMB-BE +; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V6M-THUMB +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V7M-THUMB +; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2-DSP +; RUN: llc -mtriple=armv5te-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V5TE ; Check generated signed and unsigned multiply accumulate long. define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) { @@ -20,12 +21,9 @@ define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) { ;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] ;CHECK-BE: mov r0, [[RDHI]] ;CHECK-BE: mov r1, [[RDLO]] -;CHECK-V6-THUMB2: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V6-THUMB2: mov r0, [[RDLO]] -;CHECK-V6-THUMB2: mov r1, [[RDHI]] -;CHECK-V7-THUMB: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V7-THUMB: mov r0, [[RDLO]] -;CHECK-V7-THUMB: mov r1, [[RDHI]] +;CHECK-T2-DSP: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]] +;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]] ;CHECK-V7-THUMB-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] ;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] ;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] @@ -44,12 +42,9 @@ define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) { ;CHECK-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] ;CHECK-BE: mov r0, [[RDHI]] ;CHECK-BE: mov r1, [[RDLO]] -;CHECK-V6-THUMB2: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V6-THUMB2: mov r0, [[RDLO]] -;CHECK-V6-THUMB2: mov r1, [[RDHI]] -;CHECK-V7-THUMB: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V7-THUMB: mov r0, [[RDLO]] -;CHECK-V7-THUMB: mov r1, [[RDHI]] +;CHECK-T2-DSP: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]] +;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]] ;CHECK-V7-THUMB-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] ;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] ;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] @@ -78,8 +73,7 @@ define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) { ;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0 ;CHECK-BE: mov r0, [[RDHI]] ;CHECK-BE: mov r1, [[RDLO]] -;CHECK-V6-THUMB2: umlal -;CHECK-V7-THUMB: umlal +;CHECK-T2-DSP: umlal ;CHECK-V6-THUMB-NOT: umlal %conv = zext i32 %b to i64 %conv1 = zext i32 %a to i64 @@ -92,8 +86,7 @@ define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) { define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) { ;CHECK-LABEL: MACLongTest4: ;CHECK-V6-THUMB-NOT: smlal -;CHECK-V6-THUMB2: smlal -;CHECK-V7-THUMB: smlal +;CHECK-T2-DSP: smlal ;CHECK-LE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31 ;CHECK-LE: smlal [[RDLO]], [[RDHI]], r1, r0 ;CHECK-LE: mov r0, [[RDLO]] @@ -114,14 +107,12 @@ define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) { ;CHECK-LABEL: MACLongTest6: ;CHECK-V6-THUMB-NOT: smull ;CHECK-V6-THUMB-NOT: smlal -;CHECK: smull r12, lr, r1, r0 -;CHECK: smlal r12, lr, r3, r2 +;CHECK-LE: smull r12, lr, r1, r0 +;CHECK-LE: smlal r12, lr, r3, r2 ;CHECK-V7: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0 ;CHECK-V7: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]] -;CHECK-V7-THUMB: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0 -;CHECK-V7-THUMB: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]] -;CHECK-V6-THUMB2: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0 -;CHECK-V6-THUMB2: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]] +;CHECK-T2-DSP: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0 +;CHECK-T2-DSP: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]] %conv = sext i32 %a to i64 %conv1 = sext i32 %b to i64 %mul = mul nsw i64 %conv1, %conv @@ -172,18 +163,12 @@ define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) { ;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] ;CHECK-V7-BE: mov r0, [[RDHI]] ;CHECK-V7-BE: mov r1, [[RDLO]] -;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V6-THUMB2: mov r0, [[RDLO]] -;CHECK-V6-THUMB2: mov r1, [[RDHI]] -;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V7-THUMB: mov r0, [[RDLO]] -;CHECK-V7-THUMB: mov r1, [[RDHI]] +;CHECK-T2-DSP: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]] +;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]] ;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] ;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] ;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] -;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V7EM-THUMB: mov r0, [[RDLO]] -;CHECK-V7EM-THUMB: mov r1, [[RDHI]] ;CHECK-NOT:umaal ;CHECK-V6-THUMB-NOT: umaal ;CHECK-V6M-THUMB-NOT: umaal @@ -206,18 +191,12 @@ define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) { ;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] ;CHECK-V7-BE: mov r0, [[RDHI]] ;CHECK-V7-BE: mov r1, [[RDLO]] -;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V6-THUMB2: mov r0, [[RDLO]] -;CHECK-V6-THUMB2: mov r1, [[RDHI]] -;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V7-THUMB: mov r0, [[RDLO]] -;CHECK-V7-THUMB: mov r1, [[RDHI]] +;CHECK-T2-DSP: umaal r2, r3, r1, r0 +;CHECK-T2-DSP-NEXT: mov r0, r2 +;CHECK-T2-DSP-NEXT: mov r1, r3 ;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] ;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] ;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] -;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] -;CHECK-V7EM-THUMB: mov r0, [[RDLO]] -;CHECK-V7EM-THUMB: mov r1, [[RDHI]] ;CHECK-NOT:umaal ;CHECK-V6-THUMB-NOT:umaal ;CHECK-V6M-THUMB-NOT: umaal @@ -231,3 +210,188 @@ define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) { %add2 = add i64 %add, %mul ret i64 %add2 } + +define i64 @MACLongTest11(i16 %a, i16 %b, i64 %c) { +;CHECK-LABEL: MACLongTest11: +;CHECK-T2-DSP-NOT: sxth +;CHECK-T2-DSP: smlalbb r2, r3 +;CHECK-T2-DSP-NEXT: mov r0, r2 +;CHECK-T2-DSP-NEXT: mov r1, r3 +;CHECK-V5TE-NOT: sxth +;CHECK-V5TE: smlalbb r2, r3 +;CHECK-V5TE-NEXT: mov r0, r2 +;CHECK-V5TE-NEXT: mov r1, r3 +;CHECK-V7-LE-NOT: sxth +;CHECK-V7-LE: smlalbb r2, r3 +;CHECK-V7-LE-NEXT: mov r0, r2 +;CHECK-V7-LE-NEXT: mov r1, r3 +;CHECK-V7-THUMB-BE: smlalbb r3, r2 +;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 +;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 +;CHECK-LE-NOT: smlalbb +;CHECK-BE-NOT: smlalbb +;CHECK-V6M-THUMB-NOT: smlalbb +;CHECK-V7M-THUMB-NOT: smlalbb + %conv = sext i16 %a to i32 + %conv1 = sext i16 %b to i32 + %mul = mul nsw i32 %conv1, %conv + %conv2 = sext i32 %mul to i64 + %add = add nsw i64 %conv2, %c + ret i64 %add +} + +define i64 @MACLongTest12(i16 %b, i32 %t, i64 %c) { +;CHECK-LABEL: MACLongTest12: +;CHECK-T2-DSP-NOT: sxth +;CHECK-T2-DSP-NOT: {{asr|lsr}} +;CHECK-T2-DSP: smlalbt r2, r3, r0, r1 +;CHECK-T2-DSP-NEXT: mov r0, r2 +;CHECK-T2-DSP-NEXT: mov r1, r3 +;CHECK-T2-DSP-NOT: sxth +;CHECK-V5TE-NOT: sxth +;CHECK-V5TE-NOT: {{asr|lsr}} +;CHECK-V5TE: smlalbt r2, r3, r0, r1 +;CHECK-V5TE-NEXT: mov r0, r2 +;CHECK-V5TE-NEXT: mov r1, r3 +;CHECK-V7-LE-NOT: sxth +;CHECK-V7-LE-NOT: {{asr|lsr}} +;CHECK-V7-LE: smlalbt r2, r3, r0, r1 +;CHECK-V7-LE-NEXT: mov r0, r2 +;CHECK-V7-LE-NEXT: mov r1, r3 +;CHECK-V7-THUMB-BE: smlalbt r3, r2, +;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 +;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 +;CHECK-LE-NOT: smlalbt +;CHECK-BE-NOT: smlalbt +;CHECK-V6M-THUMB-NOT: smlalbt +;CHECK-V7M-THUMB-NOT: smlalbt + %conv0 = sext i16 %b to i32 + %conv1 = ashr i32 %t, 16 + %mul = mul nsw i32 %conv0, %conv1 + %conv2 = sext i32 %mul to i64 + %add = add nsw i64 %conv2, %c + ret i64 %add +} + +define i64 @MACLongTest13(i32 %t, i16 %b, i64 %c) { +;CHECK-LABEL: MACLongTest13: +;CHECK-T2-DSP-NOT: sxth +;CHECK-T2-DSP-NOT: {{asr|lsr}} +;CHECK-T2-DSP: smlaltb r2, r3, r0, r1 +;CHECK-T2-DSP-NEXT: mov r0, r2 +;CHECK-T2-DSP-NEXT: mov r1, r3 +;CHECK-V5TE-NOT: sxth +;CHECK-V5TE-NOT: {{asr|lsr}} +;CHECK-V5TE: smlaltb r2, r3, r0, r1 +;CHECK-V5TE-NEXT: mov r0, r2 +;CHECK-V5TE-NEXT: mov r1, r3 +;CHECK-V7-LE-NOT: sxth +;CHECK-V7-LE-NOT: {{asr|lsr}} +;CHECK-V7-LE: smlaltb r2, r3, r0, r1 +;CHECK-V7-LE-NEXT: mov r0, r2 +;CHECK-V7-LE-NEXT: mov r1, r3 +;CHECK-V7-THUMB-BE: smlaltb r3, r2, r0, r1 +;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 +;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 +;CHECK-LE-NOT: smlaltb +;CHECK-BE-NOT: smlaltb +;CHECK-V6M-THUMB-NOT: smlaltb +;CHECK-V7M-THUMB-NOT: smlaltb + %conv0 = ashr i32 %t, 16 + %conv1= sext i16 %b to i32 + %mul = mul nsw i32 %conv0, %conv1 + %conv2 = sext i32 %mul to i64 + %add = add nsw i64 %conv2, %c + ret i64 %add +} + +define i64 @MACLongTest14(i32 %a, i32 %b, i64 %c) { +;CHECK-LABEL: MACLongTest14: +;CHECK-T2-DSP-NOT: {{asr|lsr}} +;CHECK-T2-DSP: smlaltt r2, r3, +;CHECK-T2-DSP-NEXT: mov r0, r2 +;CHECK-T2-DSP-NEXT: mov r1, r3 +;CHECK-V5TE-NOT: {{asr|lsr}} +;CHECK-V5TE: smlaltt r2, r3, +;CHECK-V5TE-NEXT: mov r0, r2 +;CHECK-V5TE-NEXT: mov r1, r3 +;CHECK-V7-LE-NOT: {{asr|lsr}} +;CHECK-V7-LE: smlaltt r2, r3, +;CHECK-V7-LE-NEXT: mov r0, r2 +;CHECK-V7-LE-NEXT: mov r1, r3 +;CHECK-V7-THUMB-BE: smlaltt r3, r2, +;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 +;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 +;CHECK-LE-NOT: smlaltt +;CHECK-BE-NOT: smlaltt +;CHECK-V6M-THUMB-NOT: smlaltt +;CHECK-V7M-THUMB-NOT: smlaltt + %conv0 = ashr i32 %a, 16 + %conv1 = ashr i32 %b, 16 + %mul = mul nsw i32 %conv1, %conv0 + %conv2 = sext i32 %mul to i64 + %add = add nsw i64 %conv2, %c + ret i64 %add +} + +@global_b = external global i16, align 2 +;CHECK-LABEL: MACLongTest15 +;CHECK-T2-DSP-NOT: {{asr|lsr}} +;CHECK-T2-DSP: smlaltb r2, r3, r0, r1 +;CHECK-T2-DSP-NEXT: mov r0, r2 +;CHECK-T2-DSP-NEXT: mov r1, r3 +;CHECK-V5TE-NOT: {{asr|lsr}} +;CHECK-V5TE: smlaltb r2, r3, r0, r1 +;CHECK-V5TE-NEXT: mov r0, r2 +;CHECK-V5TE-NEXT: mov r1, r3 +;CHECK-V7-LE-NOT: {{asr|lsr}} +;CHECK-V7-LE: smlaltb r2, r3, r0, r1 +;CHECK-V7-LE-NEXT: mov r0, r2 +;CHECK-V7-LE-NEXT: mov r1, r3 +;CHECK-V7-THUMB-BE: smlaltb r3, r2, r0, r1 +;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 +;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 +;CHECK-LE-NOT: smlaltb +;CHECK-BE-NOT: smlaltb +;CHECK-V6M-THUMB-NOT: smlaltb +;CHECK-V7M-THUMB-NOT: smlaltb +define i64 @MACLongTest15(i32 %t, i64 %acc) { +entry: + %0 = load i16, i16* @global_b, align 2 + %conv = sext i16 %0 to i32 + %shr = ashr i32 %t, 16 + %mul = mul nsw i32 %shr, %conv + %conv1 = sext i32 %mul to i64 + %add = add nsw i64 %conv1, %acc + ret i64 %add +} + +;CHECK-LABEL: MACLongTest16 +;CHECK-T2-DSP-NOT: {{asr|lsr}} +;CHECK-T2-DSP: smlalbt r2, r3, r1, r0 +;CHECK-T2-DSP-NEXT: mov r0, r2 +;CHECK-T2-DSP-NEXT: mov r1, r3 +;CHECK-V5TE-NOT: {{asr|lsr}} +;CHECK-V5TE: smlalbt r2, r3, r1, r0 +;CHECK-V5TE-NEXT: mov r0, r2 +;CHECK-V5TE-NEXT: mov r1, r3 +;CHECK-V7-LE: smlalbt r2, r3, r1, r0 +;CHECK-V7-LE-NEXT: mov r0, r2 +;CHECK-V7-LE-NEXT: mov r1, r3 +;CHECK-V7-THUMB-BE: smlalbt r3, r2, r1, r0 +;CHECK-V7-THUMB-BE-NEXT: mov r0, r2 +;CHECK-V7-THUMB-BE-NEXT: mov r1, r3 +;CHECK-LE-NOT: smlalbt +;CHECK-BE-NOT: smlalbt +;CHECK-V6M-THUMB-NOT: smlalbt +;CHECK-V7M-THUMB-NOT: smlalbt +define i64 @MACLongTest16(i32 %t, i64 %acc) { +entry: + %0 = load i16, i16* @global_b, align 2 + %conv = sext i16 %0 to i32 + %shr = ashr i32 %t, 16 + %mul = mul nsw i32 %conv, %shr + %conv1 = sext i32 %mul to i64 + %add = add nsw i64 %conv1, %acc + ret i64 %add +} diff --git a/test/CodeGen/ARM/lowerMUL-newload.ll b/test/CodeGen/ARM/lowerMUL-newload.ll new file mode 100644 index 0000000000000..93d765cba1168 --- /dev/null +++ b/test/CodeGen/ARM/lowerMUL-newload.ll @@ -0,0 +1,115 @@ +; RUN: llc < %s -mtriple=arm-eabi -mcpu=krait | FileCheck %s + +define void @func1(i16* %a, i16* %b, i16* %c) { +entry: +; The test case trying to vectorize the pseudo code below. +; a[i] = b[i] + c[i]; +; b[i] = a[i] * c[i]; +; a[i] = b[i] + a[i] * c[i]; +; +; Checking that vector load a[i] for "a[i] = b[i] + a[i] * c[i]" is +; scheduled before the first vector store to "a[i] = b[i] + c[i]". +; Checking that there is no vector load a[i] scheduled between the vector +; stores to a[i], otherwise the load of a[i] will be polluted by the first +; vector store to a[i]. +; +; This test case check that the chain information is updated during +; lowerMUL for the new created Load SDNode. + +; CHECK: vldr {{.*}} [r0, #16] +; CHECK: vstr {{.*}} [r0, #16] +; CHECK-NOT: vldr {{.*}} [r0, #16] +; CHECK: vstr {{.*}} [r0, #16] + + %scevgep0 = getelementptr i16, i16* %a, i32 8 + %vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>* + %vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8 + %scevgep1 = getelementptr i16, i16* %b, i32 8 + %vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>* + %vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8 + %0 = zext <4 x i16> %vec1 to <4 x i32> + %scevgep2 = getelementptr i16, i16* %c, i32 8 + %vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>* + %vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8 + %1 = sext <4 x i16> %vec2 to <4 x i32> + %vec3 = add <4 x i32> %1, %0 + %2 = trunc <4 x i32> %vec3 to <4 x i16> + %scevgep3 = getelementptr i16, i16* %a, i32 8 + %vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>* + store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8 + %vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>* + %vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8 + %3 = sext <4 x i16> %vec4 to <4 x i32> + %vec5 = mul <4 x i32> %3, %vec3 + %4 = trunc <4 x i32> %vec5 to <4 x i16> + %vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>* + store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8 + %5 = sext <4 x i16> %vec0 to <4 x i32> + %vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>* + %vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8 + %6 = sext <4 x i16> %vec6 to <4 x i32> + %vec7 = mul <4 x i32> %6, %5 + %vec8 = add <4 x i32> %vec7, %vec5 + %7 = trunc <4 x i32> %vec8 to <4 x i16> + %vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>* + store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8 + ret void +} + +define void @func2(i16* %a, i16* %b, i16* %c) { +entry: +; The test case trying to vectorize the pseudo code below. +; a[i] = b[i] + c[i]; +; b[i] = a[i] * c[i]; +; a[i] = b[i] + a[i] * c[i] + a[i]; +; +; Checking that vector load a[i] for "a[i] = b[i] + a[i] * c[i] + a[i]" +; is scheduled before the first vector store to "a[i] = b[i] + c[i]". +; Checking that there is no vector load a[i] scheduled between the first +; vector store to a[i] and the vector add of a[i], otherwise the load of +; a[i] will be polluted by the first vector store to a[i]. +; +; This test case check that both the chain and value of the new created +; Load SDNode are updated during lowerMUL. + +; CHECK: vldr {{.*}} [r0, #16] +; CHECK: vstr {{.*}} [r0, #16] +; CHECK-NOT: vldr {{.*}} [r0, #16] +; CHECK: vaddw.s16 +; CHECK: vstr {{.*}} [r0, #16] + + %scevgep0 = getelementptr i16, i16* %a, i32 8 + %vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>* + %vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8 + %scevgep1 = getelementptr i16, i16* %b, i32 8 + %vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>* + %vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8 + %0 = zext <4 x i16> %vec1 to <4 x i32> + %scevgep2 = getelementptr i16, i16* %c, i32 8 + %vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>* + %vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8 + %1 = sext <4 x i16> %vec2 to <4 x i32> + %vec3 = add <4 x i32> %1, %0 + %2 = trunc <4 x i32> %vec3 to <4 x i16> + %scevgep3 = getelementptr i16, i16* %a, i32 8 + %vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>* + store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8 + %vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>* + %vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8 + %3 = sext <4 x i16> %vec4 to <4 x i32> + %vec5 = mul <4 x i32> %3, %vec3 + %4 = trunc <4 x i32> %vec5 to <4 x i16> + %vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>* + store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8 + %5 = sext <4 x i16> %vec0 to <4 x i32> + %vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>* + %vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8 + %6 = sext <4 x i16> %vec6 to <4 x i32> + %vec7 = mul <4 x i32> %6, %5 + %vec8 = add <4 x i32> %vec7, %vec5 + %vec9 = add <4 x i32> %vec8, %5 + %7 = trunc <4 x i32> %vec9 to <4 x i16> + %vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>* + store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8 + ret void +} diff --git a/test/CodeGen/ARM/mature-mc-support.ll b/test/CodeGen/ARM/mature-mc-support.ll index 0a7e5b91adc5f..f89657dd81ac3 100644 --- a/test/CodeGen/ARM/mature-mc-support.ll +++ b/test/CodeGen/ARM/mature-mc-support.ll @@ -9,4 +9,4 @@ module asm " .this_directive_is_very_unlikely_to_exist" -; CHECK: LLVM ERROR: Error parsing inline asm +; CHECK: error: unknown directive diff --git a/test/CodeGen/ARM/misched-fp-basic.ll b/test/CodeGen/ARM/misched-fp-basic.ll new file mode 100644 index 0000000000000..27ad2cec34fd6 --- /dev/null +++ b/test/CodeGen/ARM/misched-fp-basic.ll @@ -0,0 +1,69 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a9 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=swift -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 +; +; Check the latency of instructions for processors with sched-models +; +; Function Attrs: norecurse nounwind readnone +define i32 @foo(float %a, float %b, float %c, i32 %d) local_unnamed_addr #0 { +entry: +; +; CHECK: ********** MI Scheduling ********** +; CHECK_A9: VADDS +; CHECK_SWIFT: VADDfd +; CHECK_R52: VADDS +; CHECK_A9: Latency : 5 +; CHECK_SWIFT: Latency : 4 +; CHECK_R52: Latency : 6 +; +; CHECK_A9: VMULS +; CHECK_SWIFT: VMULfd +; CHECK_R52: VMULS +; CHECK_SWIFT: Latency : 4 +; CHECK_A9: Latency : 6 +; CHECK_R52: Latency : 6 +; +; CHECK: VDIVS +; CHECK_SWIFT: Latency : 17 +; CHECK_A9: Latency : 16 +; CHECK_R52: Latency : 7 +; +; CHECK: VCVTDS +; CHECK_SWIFT: Latency : 4 +; CHECK_A9: Latency : 5 +; CHECK_R52: Latency : 6 +; +; CHECK: VADDD +; CHECK_SWIFT: Latency : 6 +; CHECK_A9: Latency : 5 +; CHECK_R52: Latency : 6 +; +; CHECK: VMULD +; CHECK_SWIFT: Latency : 6 +; CHECK_A9: Latency : 7 +; CHECK_R52: Latency : 6 +; +; CHECK: VDIVD +; CHECK_SWIFT: Latency : 32 +; CHECK_A9: Latency : 26 +; CHECK_R52: Latency : 17 +; +; CHECK: VTOSIZD +; CHECK_SWIFT: Latency : 4 +; CHECK_A9: Latency : 5 +; CHECK_R52: Latency : 6 +; + %add = fadd float %a, %b + %mul = fmul float %add, %add + %div = fdiv float %mul, %b + %conv1 = fpext float %div to double + %add3 = fadd double %conv1, %conv1 + %mul4 = fmul double %add3, %add3 + %div5 = fdiv double %mul4, %conv1 + %conv6 = fptosi double %div5 to i32 + ret i32 %conv6 +} diff --git a/test/CodeGen/ARM/misched-int-basic-thumb2.mir b/test/CodeGen/ARM/misched-int-basic-thumb2.mir new file mode 100644 index 0000000000000..86ef1e26f6368 --- /dev/null +++ b/test/CodeGen/ARM/misched-int-basic-thumb2.mir @@ -0,0 +1,175 @@ +# Basic machine sched model test for Thumb2 int instructions +# RUN: llc -o /dev/null %s -mtriple=thumbv7-eabi -mcpu=swift -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT +# RUN: llc -o /dev/null %s -mtriple=thumbv7--eabi -mcpu=cortex-a9 -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 +# RUN: llc -o /dev/null %s -mtriple=thumbv8r-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 +# REQUIRES: asserts +--- | + ; ModuleID = 'foo.ll' + source_filename = "foo.ll" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7---eabi" + + @g1 = common global i32 0, align 4 + @g2 = common global i32 0, align 4 + + define i64 @foo(i16 signext %a, i16 signext %b) { + entry: + %0 = load i32, i32* @g1, align 4 + %1 = load i32, i32* @g2, align 4 + %2 = add nuw nsw i32 %0, %0 + %3 = sdiv i32 %2, %1 + store i32 %3, i32* @g1, align 4 + %d = mul nsw i16 %a, %a + %e = mul nsw i16 %b, %b + %f = add nuw nsw i16 %e, %d + %c = zext i16 %f to i32 + %mul8 = mul nsw i32 %c, %3 + %mul9 = mul nsw i32 %mul8, %mul8 + %add10 = add nuw nsw i32 %mul9, %mul8 + %conv1130 = zext i32 %add10 to i64 + %mul12 = mul nuw nsw i64 %conv1130, %conv1130 + %mul13 = mul nsw i64 %mul12, %mul12 + %add14 = add nuw nsw i64 %mul13, %mul12 + ret i64 %add14 + } +# +# CHECK: ********** MI Scheduling ********** +# CHECK: SU(2): %vreg2<def> = t2MOVi32imm <ga:@g1>; rGPR:%vreg2 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 2 +# CHECK_R52: Latency : 2 +# +# CHECK: SU(3): %vreg3<def> = t2LDRi12 %vreg2, 0, pred:14, pred:%noreg; mem:LD4[@g1](dereferenceable) rGPR:%vreg3,%vreg2 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 3 +# CHECK_R52: Latency : 4 +# +# CHECK : SU(6): %vreg6<def> = t2ADDrr %vreg3, %vreg3, pred:14, pred:%noreg, opt:%noreg; rGPR:%vreg6,%vreg3,%vreg3 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 1 +# CHECK_R52: Latency : 3 + +# CHECK: SU(7): %vreg7<def> = t2SDIV %vreg6, %vreg5, pred:14, pred:%noreg; rGPR:%vreg7,%vreg6,%vreg5 +# CHECK_A9: Latency : 0 +# CHECK_SWIFT: Latency : 14 +# CHECK_R52: Latency : 8 + +# CHECK: SU(8): t2STRi12 %vreg7, %vreg2, 0, pred:14, pred:%noreg; mem:ST4[@g1] rGPR:%vreg7,%vreg2 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 0 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(9): %vreg8<def> = t2SMULBB %vreg1, %vreg1, pred:14, pred:%noreg; rGPR:%vreg8,%vreg1,%vreg1 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(10): %vreg9<def> = t2SMLABB %vreg0, %vreg0, %vreg8, pred:14, pred:%noreg; rGPR:%vreg9,%vreg0,%vreg0,%vreg8 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(11): %vreg10<def> = t2UXTH %vreg9, 0, pred:14, pred:%noreg; rGPR:%vreg10,%vreg9 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 1 +# CHECK_R52: Latency : 3 +# +# CHECK: SU(12): %vreg11<def> = t2MUL %vreg10, %vreg7, pred:14, pred:%noreg; rGPR:%vreg11,%vreg10,%vreg7 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(13): %vreg12<def> = t2MLA %vreg11, %vreg11, %vreg11, pred:14, pred:%noreg; rGPR:%vreg12,%vreg11,%vreg11,%vreg11 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(14): %vreg13<def>, %vreg14<def> = t2UMULL %vreg12, %vreg12, pred:14, pred:%noreg; rGPR:%vreg13,%vreg14,%vreg12,%vreg12 +# CHECK_A9: Latency : 3 +# CHECK_SWIFT: Latency : 5 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(18): %vreg19<def,tied4>, %vreg20<def,tied5> = t2UMLAL %vreg12, %vreg12, %vreg19<tied0>, %vreg20<tied1>, pred:14, pred:%noreg; rGPR:%vreg19,%vreg20,%vreg12,%vreg12,%vreg20 +# CHECK_A9: Latency : 3 +# CHECK_SWIFT: Latency : 7 +# CHECK_R52: Latency : 4 +# CHECK: ** ScheduleDAGMILive::schedule picking next node +... +--- +name: foo +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: rgpr } + - { id: 1, class: rgpr } + - { id: 2, class: rgpr } + - { id: 3, class: rgpr } + - { id: 4, class: rgpr } + - { id: 5, class: rgpr } + - { id: 6, class: rgpr } + - { id: 7, class: rgpr } + - { id: 8, class: rgpr } + - { id: 9, class: rgpr } + - { id: 10, class: rgpr } + - { id: 11, class: rgpr } + - { id: 12, class: rgpr } + - { id: 13, class: rgpr } + - { id: 14, class: rgpr } + - { id: 15, class: rgpr } + - { id: 16, class: rgpr } + - { id: 17, class: rgpr } + - { id: 18, class: rgpr } + - { id: 19, class: rgpr } + - { id: 20, class: rgpr } +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r0, %r1 + + %1 = COPY %r1 + %0 = COPY %r0 + %2 = t2MOVi32imm @g1 + %3 = t2LDRi12 %2, 0, 14, _ :: (dereferenceable load 4 from @g1) + %4 = t2MOVi32imm @g2 + %5 = t2LDRi12 %4, 0, 14, _ :: (dereferenceable load 4 from @g2) + %6 = t2ADDrr %3, %3, 14, _, _ + %7 = t2SDIV %6, %5, 14, _ + t2STRi12 %7, %2, 0, 14, _ :: (store 4 into @g1) + %8 = t2SMULBB %1, %1, 14, _ + %9 = t2SMLABB %0, %0, %8, 14, _ + %10 = t2UXTH %9, 0, 14, _ + %11 = t2MUL %10, %7, 14, _ + %12 = t2MLA %11, %11, %11, 14, _ + %13, %14 = t2UMULL %12, %12, 14, _ + %19, %16 = t2UMULL %13, %13, 14, _ + %17 = t2MLA %13, %14, %16, 14, _ + %20 = t2MLA %13, %14, %17, 14, _ + %19, %20 = t2UMLAL %12, %12, %19, %20, 14, _ + %r0 = COPY %19 + %r1 = COPY %20 + tBX_RET 14, _, implicit %r0, implicit %r1 + +... diff --git a/test/CodeGen/ARM/misched-int-basic.mir b/test/CodeGen/ARM/misched-int-basic.mir new file mode 100644 index 0000000000000..f237c0a07b2ed --- /dev/null +++ b/test/CodeGen/ARM/misched-int-basic.mir @@ -0,0 +1,128 @@ +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=swift -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-a9 -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 +# REQUIRES: asserts +--- | + ; ModuleID = 'foo.ll' + source_filename = "foo.ll" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "arm---eabi" + + define i64 @foo(i16 signext %a, i16 signext %b) { + entry: + %d = mul nsw i16 %a, %a + %e = mul nsw i16 %b, %b + %f = add nuw nsw i16 %e, %d + %c = zext i16 %f to i32 + %mul8 = mul nsw i32 %c, %c + %mul9 = mul nsw i32 %mul8, %mul8 + %add10 = add nuw nsw i32 %mul9, %mul8 + %conv1130 = zext i32 %add10 to i64 + %mul12 = mul nuw nsw i64 %conv1130, %conv1130 + %mul13 = mul nsw i64 %mul12, %mul12 + %add14 = add nuw nsw i64 %mul13, %mul12 + ret i64 %add14 + } + +# CHECK: ********** MI Scheduling ********** +# CHECK: SU(2): %vreg2<def> = SMULBB %vreg1, %vreg1, pred:14, pred:%noreg; GPR:%vreg2,%vreg1,%vreg1 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(3): %vreg3<def> = SMLABB %vreg0, %vreg0, %vreg2, pred:14, pred:%noreg; GPRnopc:%vreg3,%vreg0,%vreg0 GPR:%vreg2 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(4): %vreg4<def> = UXTH %vreg3, 0, pred:14, pred:%noreg; GPRnopc:%vreg4,%vreg3 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 1 +# CHECK_R52: Latency : 3 +# +# CHECK: SU(5): %vreg5<def> = MUL %vreg4, %vreg4, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg5,%vreg4,%vreg4 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(6): %vreg6<def> = MLA %vreg5, %vreg5, %vreg5, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg6,%vreg5,%vreg5,%vreg5 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(7): %vreg7<def>, %vreg8<def> = UMULL %vreg6, %vreg6, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg7,%vreg8,%vreg6,%vreg6 +# CHECK_A9: Latency : 3 +# CHECK_SWIFT: Latency : 5 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(11): %vreg13<def,tied4>, %vreg14<def,tied5> = UMLAL %vreg6, %vreg6, %vreg13<tied0>, %vreg14<tied1>, pred:14, pred:%noreg, opt:%noreg; GPR:%vreg13 GPRnopc:%vreg14,%vreg6,%vreg6 +# CHECK_SWIFT: Latency : 7 +# CHECK_A9: Latency : 3 +# CHECK_R52: Latency : 4 +# CHECK: ** ScheduleDAGMILive::schedule picking next node +... +--- +name: foo +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gprnopc } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gprnopc } + - { id: 4, class: gprnopc } + - { id: 5, class: gprnopc } + - { id: 6, class: gprnopc } + - { id: 7, class: gprnopc } + - { id: 8, class: gprnopc } + - { id: 9, class: gpr } + - { id: 10, class: gprnopc } + - { id: 11, class: gprnopc } + - { id: 12, class: gprnopc } + - { id: 13, class: gpr } + - { id: 14, class: gprnopc } +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r0, %r1 + + %1 = COPY %r1 + %0 = COPY %r0 + %2 = SMULBB %1, %1, 14, _ + %3 = SMLABB %0, %0, %2, 14, _ + %4 = UXTH %3, 0, 14, _ + %5 = MUL %4, %4, 14, _, _ + %6 = MLA %5, %5, %5, 14, _, _ + %7, %8 = UMULL %6, %6, 14, _, _ + %13, %10 = UMULL %7, %7, 14, _, _ + %11 = MLA %7, %8, %10, 14, _, _ + %14 = MLA %7, %8, %11, 14, _, _ + %13, %14 = UMLAL %6, %6, %13, %14, 14, _, _ + %r0 = COPY %13 + %r1 = COPY %14 + BX_RET 14, _, implicit %r0, implicit %r1 + +... diff --git a/test/CodeGen/ARM/movt.ll b/test/CodeGen/ARM/movt.ll index da9b698f20996..f51582031bd59 100644 --- a/test/CodeGen/ARM/movt.ll +++ b/test/CodeGen/ARM/movt.ll @@ -2,10 +2,15 @@ ; rdar://7317664 ; RUN: llc -mtriple=thumbv8m.base %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8m.base -mcpu=cortex-m23 %s -o - | FileCheck %s --check-prefix=NOMOVT +; RUN: llc -mtriple=thumbv8m.base -mcpu=cortex-m33 %s -o - | FileCheck %s define i32 @t(i32 %X) nounwind { ; CHECK-LABEL: t: ; CHECK: movt r{{[0-9]}}, #65535 +; NOMOVT-LABEL: t: +; NOMOVT-NOT: movt r{{[0-9]}}, #65535 +; NOMOVT: ldr r{{[0-9]}}, .LCP entry: %0 = or i32 %X, -65536 ret i32 %0 @@ -14,6 +19,9 @@ entry: define i32 @t2(i32 %X) nounwind { ; CHECK-LABEL: t2: ; CHECK: movt r{{[0-9]}}, #65534 +; NOMOVT-LABEL: t2: +; NOMOVT-NOT: movt r{{[0-9]}}, #65534 +; NOMOVT: ldr r{{[0-9]}}, .LCP entry: %0 = or i32 %X, -131072 %1 = and i32 %0, -65537 diff --git a/test/CodeGen/ARM/msr-it-block.ll b/test/CodeGen/ARM/msr-it-block.ll index 0f9ff6b29d795..8d4ddc3a49853 100644 --- a/test/CodeGen/ARM/msr-it-block.ll +++ b/test/CodeGen/ARM/msr-it-block.ll @@ -20,8 +20,8 @@ write_reg: ; V6M: msr apsr, {{r[0-9]+}} ; V7M: msr apsr_nzcvq, {{r[0-9]+}} ; V7M: msr apsr_nzcvq, {{r[0-9]+}} -; V7A: msr APSR_nzcvqg, {{r[0-9]+}} -; V7A: msr APSR_nzcvqg, {{r[0-9]+}} +; V7A: msr APSR_nzcvq, {{r[0-9]+}} +; V7A: msr APSR_nzcvq, {{r[0-9]+}} br label %exit exit: @@ -41,8 +41,8 @@ write_reg: ; V6M: msr apsr, {{r[0-9]+}} ; V7M: msr apsr_nzcvq, {{r[0-9]+}} ; V7M: msr apsr_nzcvq, {{r[0-9]+}} -; V7A: msr APSR_nzcvqg, {{r[0-9]+}} -; V7A: msr APSR_nzcvqg, {{r[0-9]+}} +; V7A: msr APSR_nzcvq, {{r[0-9]+}} +; V7A: msr APSR_nzcvq, {{r[0-9]+}} br label %exit exit: diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll index d32e7b78879ba..109d09582afdc 100644 --- a/test/CodeGen/ARM/neon_vabs.ll +++ b/test/CodeGen/ARM/neon_vabs.ll @@ -1,8 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s define <4 x i32> @test1(<4 x i32> %a) nounwind { ; CHECK-LABEL: test1: -; CHECK: vabs.s32 q +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s32 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg @@ -11,7 +18,13 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind { define <4 x i32> @test2(<4 x i32> %a) nounwind { ; CHECK-LABEL: test2: -; CHECK: vabs.s32 q +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s32 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sge <4 x i32> %a, zeroinitializer %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg @@ -20,7 +33,13 @@ define <4 x i32> @test2(<4 x i32> %a) nounwind { define <8 x i16> @test3(<8 x i16> %a) nounwind { ; CHECK-LABEL: test3: -; CHECK: vabs.s16 q +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s16 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <8 x i16> zeroinitializer, %a %b = icmp sgt <8 x i16> %a, zeroinitializer %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg @@ -29,7 +48,13 @@ define <8 x i16> @test3(<8 x i16> %a) nounwind { define <16 x i8> @test4(<16 x i8> %a) nounwind { ; CHECK-LABEL: test4: -; CHECK: vabs.s8 q +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s8 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <16 x i8> zeroinitializer, %a %b = icmp slt <16 x i8> %a, zeroinitializer %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a @@ -38,7 +63,13 @@ define <16 x i8> @test4(<16 x i8> %a) nounwind { define <4 x i32> @test5(<4 x i32> %a) nounwind { ; CHECK-LABEL: test5: -; CHECK: vabs.s32 q +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s32 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sle <4 x i32> %a, zeroinitializer %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a @@ -47,7 +78,11 @@ define <4 x i32> @test5(<4 x i32> %a) nounwind { define <2 x i32> @test6(<2 x i32> %a) nounwind { ; CHECK-LABEL: test6: -; CHECK: vabs.s32 d +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s32 d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <2 x i32> zeroinitializer, %a %b = icmp sgt <2 x i32> %a, <i32 -1, i32 -1> %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg @@ -56,7 +91,11 @@ define <2 x i32> @test6(<2 x i32> %a) nounwind { define <2 x i32> @test7(<2 x i32> %a) nounwind { ; CHECK-LABEL: test7: -; CHECK: vabs.s32 d +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s32 d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <2 x i32> zeroinitializer, %a %b = icmp sge <2 x i32> %a, zeroinitializer %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg @@ -65,7 +104,11 @@ define <2 x i32> @test7(<2 x i32> %a) nounwind { define <4 x i16> @test8(<4 x i16> %a) nounwind { ; CHECK-LABEL: test8: -; CHECK: vabs.s16 d +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s16 d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <4 x i16> zeroinitializer, %a %b = icmp sgt <4 x i16> %a, zeroinitializer %abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg @@ -74,7 +117,11 @@ define <4 x i16> @test8(<4 x i16> %a) nounwind { define <8 x i8> @test9(<8 x i8> %a) nounwind { ; CHECK-LABEL: test9: -; CHECK: vabs.s8 d +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s8 d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <8 x i8> zeroinitializer, %a %b = icmp slt <8 x i8> %a, zeroinitializer %abs = select <8 x i1> %b, <8 x i8> %tmp1neg, <8 x i8> %a @@ -83,7 +130,11 @@ define <8 x i8> @test9(<8 x i8> %a) nounwind { define <2 x i32> @test10(<2 x i32> %a) nounwind { ; CHECK-LABEL: test10: -; CHECK: vabs.s32 d +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vabs.s32 d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1neg = sub <2 x i32> zeroinitializer, %a %b = icmp sle <2 x i32> %a, zeroinitializer %abs = select <2 x i1> %b, <2 x i32> %tmp1neg, <2 x i32> %a @@ -95,7 +146,13 @@ define <2 x i32> @test10(<2 x i32> %a) nounwind { define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind { ; CHECK-LABEL: test11: -; CHECK: vabdl.u16 q +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d16, r2, r3 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vabdl.u16 q8, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %zext1 = zext <4 x i16> %a to <4 x i32> %zext2 = zext <4 x i16> %b to <4 x i32> %diff = sub <4 x i32> %zext1, %zext2 @@ -106,7 +163,13 @@ define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind { } define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind { ; CHECK-LABEL: test12: -; CHECK: vabdl.u8 q +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d16, r2, r3 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vabdl.u8 q8, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %zext1 = zext <8 x i8> %a to <8 x i16> %zext2 = zext <8 x i8> %b to <8 x i16> %diff = sub <8 x i16> %zext1, %zext2 @@ -118,7 +181,13 @@ define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind { define <2 x i64> @test13(<2 x i32> %a, <2 x i32> %b) nounwind { ; CHECK-LABEL: test13: -; CHECK: vabdl.u32 q +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d16, r2, r3 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vabdl.u32 q8, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %zext1 = zext <2 x i32> %a to <2 x i64> %zext2 = zext <2 x i32> %b to <2 x i64> %diff = sub <2 x i64> %zext1, %zext2 diff --git a/test/CodeGen/ARM/no-cmov2bfi.ll b/test/CodeGen/ARM/no-cmov2bfi.ll new file mode 100644 index 0000000000000..c8b5120489054 --- /dev/null +++ b/test/CodeGen/ARM/no-cmov2bfi.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -mtriple=thumbv7 | FileCheck --check-prefix=CHECK-NOBFI %s + +declare zeroext i1 @dummy() + +define i8 @test(i8 %a1, i1 %c) { +; CHECK-NOBFI-NOT: bfi +; CHECK-NOBFI: bl dummy +; CHECK-NOBFI: cmp r0, #0 +; CHECK-NOBFI: it ne +; CHECK-NOBFI: orrne [[REG:r[0-9]+]], [[REG]], #8 +; CHECK-NOBFI: mov r0, [[REG]] + + %1 = and i8 %a1, -9 + %2 = select i1 %c, i8 %1, i8 %a1 + %3 = tail call zeroext i1 @dummy() + %4 = or i8 %2, 8 + %ret = select i1 %3, i8 %4, i8 %2 + ret i8 %ret +} diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll index ff85052175c85..568f7572b32e9 100644 --- a/test/CodeGen/ARM/phi.ll +++ b/test/CodeGen/ARM/phi.ll @@ -1,5 +1,4 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s -; RUN: llc -mtriple=arm-eabi -mattr=+v4t -addr-sink-using-gep=1 %s -o - | FileCheck %s ; <rdar://problem/8686347> diff --git a/test/CodeGen/ARM/pr32545.ll b/test/CodeGen/ARM/pr32545.ll new file mode 100644 index 0000000000000..5bfb01b45983b --- /dev/null +++ b/test/CodeGen/ARM/pr32545.ll @@ -0,0 +1,22 @@ +; RUN: llc %s -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7--linux-gnueabi" + +; CHECK: vld1.16 {[[DREG:d[0-9]+]][0]}, {{.*}} +; CHECK: vmovl.u8 [[QREG:q[0-9]+]], [[DREG]] +; CHECK: vmovl.u16 [[QREG]], [[DREG]] + +define void @f(i32 %dstStride, i8* %indvars.iv, <2 x i8>* %zz) { +entry: + br label %for.body + +for.body: + %tmp = load <2 x i8>, <2 x i8>* %zz, align 1 + %tmp1 = extractelement <2 x i8> %tmp, i32 0 + %.lhs.rhs = zext i8 %tmp1 to i32 + call void @g(i32 %.lhs.rhs) + br label %for.body +} + +declare void @g(i32) diff --git a/test/CodeGen/ARM/prera-ldst-aliasing.mir b/test/CodeGen/ARM/prera-ldst-aliasing.mir new file mode 100644 index 0000000000000..ce37106ed8d2f --- /dev/null +++ b/test/CodeGen/ARM/prera-ldst-aliasing.mir @@ -0,0 +1,40 @@ +# RUN: llc -run-pass arm-prera-ldst-opt %s -o - | FileCheck %s +--- | + target triple = "thumbv7---eabi" + + define void @ldrd_strd_aa(i32* noalias nocapture %x, i32* noalias nocapture readonly %y) { + entry: + %0 = load i32, i32* %y, align 4 + store i32 %0, i32* %x, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %y, i32 1 + %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %x, i32 1 + store i32 %1, i32* %arrayidx3, align 4 + ret void + } +... +--- +name: ldrd_strd_aa +alignment: 1 +tracksRegLiveness: true +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: %r0, %r1 + + %1 : gpr = COPY %r1 + %0 : gpr = COPY %r0 + %2 : gpr = t2LDRi12 %1, 0, 14, _ :: (load 4 from %ir.y) + t2STRi12 killed %2, %0, 0, 14, _ :: (store 4 into %ir.x) + %3 : gpr = t2LDRi12 %1, 4, 14, _ :: (load 4 from %ir.arrayidx2) + t2STRi12 killed %3, %0, 4, 14, _ :: (store 4 into %ir.arrayidx3) + ; CHECK: t2LDRi12 + ; CHECK-NEXT: t2LDRi12 + ; CHECK-NEXT: t2STRi12 + ; CHECK-NEXT: t2STRi12 + tBX_RET 14, _ + +... + diff --git a/test/CodeGen/ARM/prera-ldst-insertpt.mir b/test/CodeGen/ARM/prera-ldst-insertpt.mir new file mode 100644 index 0000000000000..eafcc7c36d334 --- /dev/null +++ b/test/CodeGen/ARM/prera-ldst-insertpt.mir @@ -0,0 +1,105 @@ +# RUN: llc -run-pass arm-prera-ldst-opt %s -o - | FileCheck %s +--- | + target triple = "thumbv7---eabi" + + define void @a(i32* nocapture %x, i32 %y, i32 %z) { + entry: + ret void + } + + define void @b(i32* nocapture %x, i32 %y, i32 %z) { + entry: + ret void + } +... +--- +# CHECK-LABEL: name: a +name: a +alignment: 1 +tracksRegLiveness: true +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } + - { reg: '%r2', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: %r0, %r1, %r2 + + %2 : rgpr = COPY %r2 + %1 : rgpr = COPY %r1 + %0 : gpr = COPY %r0 + %3 : rgpr = t2MUL %2, %2, 14, _ + %4 : rgpr = t2MUL %1, %1, 14, _ + %5 : rgpr = t2MOVi32imm -858993459 + %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, _ + %8 : rgpr, %9 : rgpr = t2UMULL killed %4, %5, 14, _ + t2STRi12 %1, %0, 0, 14, _ :: (store 4) + %10 : rgpr = t2LSLri %2, 1, 14, _, _ + t2STRi12 killed %10, %0, 4, 14, _ :: (store 4) + + ; Make sure we move the paired stores next to each other, and + ; insert them in an appropriate location. + ; CHECK: t2STRi12 %1, + ; CHECK-NEXT: t2STRi12 killed %10, + ; CHECK-NEXT: t2MOVi + ; CHECK-NEXT: t2ADDrs + + %11 : rgpr = t2MOVi 55, 14, _, _ + %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, _, _ + t2STRi12 killed %12, %0, 16, 14, _ :: (store 4) + %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, _, _ + t2STRi12 killed %13, %0, 20, 14, _ :: (store 4) + + ; Make sure we move the paired stores next to each other. + ; CHECK: t2STRi12 killed %12, + ; CHECK-NEXT: t2STRi12 killed %13, + + tBX_RET 14, _ +--- +# CHECK-LABEL: name: b +name: b +alignment: 1 +tracksRegLiveness: true +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } + - { reg: '%r2', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: %r0, %r1, %r2 + + %2 : rgpr = COPY %r2 + %1 : rgpr = COPY %r1 + %0 : gpr = COPY %r0 + t2STRi12 %1, %0, 0, 14, _ :: (store 4) + %10 : rgpr = t2LSLri %2, 1, 14, _, _ + t2STRi12 killed %10, %0, 4, 14, _ :: (store 4) + %3 : rgpr = t2MUL %2, %2, 14, _ + t2STRi12 %3, %0, 8, 14, _ :: (store 4) + + ; Make sure we move the paired stores next to each other, and + ; insert them in an appropriate location. + ; CHECK: t2STRi12 {{.*}}, 0 + ; CHECK-NEXT: t2STRi12 {{.*}}, 4 + ; CHECK-NEXT: t2STRi12 {{.*}}, 8 + ; CHECK-NEXT: t2MUL + ; CHECK-NEXT: t2MOVi32imm + + %4 : rgpr = t2MUL %1, %1, 14, _ + %5 : rgpr = t2MOVi32imm -858993459 + %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, _ + %8 : rgpr, %9 : rgpr = t2UMULL killed %4, %5, 14, _ + %10 : rgpr = t2LSLri %2, 1, 14, _, _ + %11 : rgpr = t2MOVi 55, 14, _, _ + %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, _, _ + t2STRi12 killed %12, %0, 16, 14, _ :: (store 4) + %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, _, _ + t2STRi12 killed %13, %0, 20, 14, _ :: (store 4) + + ; Make sure we move the paired stores next to each other. + ; CHECK: t2STRi12 {{.*}}, 16 + ; CHECK-NEXT: t2STRi12 {{.*}}, 20 + + tBX_RET 14, _ + +... diff --git a/test/CodeGen/ARM/rbit.ll b/test/CodeGen/ARM/rbit.ll index a2bfeca75526d..c8badfb32370c 100644 --- a/test/CodeGen/ARM/rbit.ll +++ b/test/CodeGen/ARM/rbit.ll @@ -10,7 +10,8 @@ entry: ; CHECK-LABEL: rbit_constant ; CHECK: mov r0, #0 -; CHECK: rbit r0, r0 +; CHECK-NOT: rbit +; CHECK: bx lr define i32 @rbit_constant() { entry: %rbit.i = call i32 @llvm.arm.rbit(i32 0) diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll index f95f97105b9fc..a36526ff1fb03 100644 --- a/test/CodeGen/ARM/rev.ll +++ b/test/CodeGen/ARM/rev.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s define i32 @test1(i32 %X) nounwind { -; CHECK: test1 +; CHECK-LABEL: test1 ; CHECK: rev16 r0, r0 %tmp1 = lshr i32 %X, 8 %X15 = bitcast i32 %X to i32 @@ -17,7 +17,7 @@ define i32 @test1(i32 %X) nounwind { } define i32 @test2(i32 %X) nounwind { -; CHECK: test2 +; CHECK-LABEL: test2 ; CHECK: revsh r0, r0 %tmp1 = lshr i32 %X, 8 %tmp1.upgrd.1 = trunc i32 %tmp1 to i16 @@ -58,7 +58,7 @@ entry: ; rdar://9609059 define i32 @test5(i32 %i) nounwind readnone { entry: -; CHECK: test5 +; CHECK-LABEL: test5 ; CHECK: revsh r0, r0 %shl = shl i32 %i, 24 %shr = ashr exact i32 %shl, 16 @@ -71,7 +71,7 @@ entry: ; rdar://9609108 define i32 @test6(i32 %x) nounwind readnone { entry: -; CHECK: test6 +; CHECK-LABEL: test6 ; CHECK: rev16 r0, r0 %and = shl i32 %x, 8 %shl = and i32 %and, 65280 @@ -88,7 +88,7 @@ entry: ; rdar://9164521 define i32 @test7(i32 %a) nounwind readnone { entry: -; CHECK: test7 +; CHECK-LABEL: test7 ; CHECK: rev r0, r0 ; CHECK: lsr r0, r0, #16 %and = lshr i32 %a, 8 @@ -101,7 +101,7 @@ entry: define i32 @test8(i32 %a) nounwind readnone { entry: -; CHECK: test8 +; CHECK-LABEL: test8 ; CHECK: revsh r0, r0 %and = lshr i32 %a, 8 %shr4 = and i32 %and, 255 @@ -115,7 +115,7 @@ entry: ; rdar://10750814 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone { entry: -; CHECK: test9 +; CHECK-LABEL: test9 ; CHECK: rev16 r0, r0 %conv = zext i16 %v to i32 %shr4 = lshr i32 %conv, 8 diff --git a/test/CodeGen/ARM/select_const.ll b/test/CodeGen/ARM/select_const.ll new file mode 100644 index 0000000000000..48fe572bf8a72 --- /dev/null +++ b/test/CodeGen/ARM/select_const.ll @@ -0,0 +1,326 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi-unknown-unknown | FileCheck %s + +; Select of constants: control flow / conditional moves can always be replaced by logic+math (but may not be worth it?). +; Test the zeroext/signext variants of each pattern to see if that makes a difference. + +; select Cond, 0, 1 --> zext (!Cond) + +define i32 @select_0_or_1(i1 %cond) { +; CHECK-LABEL: select_0_or_1: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #1 +; CHECK-NEXT: bic r0, r1, r0 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 0, i32 1 + ret i32 %sel +} + +define i32 @select_0_or_1_zeroext(i1 zeroext %cond) { +; CHECK-LABEL: select_0_or_1_zeroext: +; CHECK: @ BB#0: +; CHECK-NEXT: eor r0, r0, #1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 0, i32 1 + ret i32 %sel +} + +define i32 @select_0_or_1_signext(i1 signext %cond) { +; CHECK-LABEL: select_0_or_1_signext: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #1 +; CHECK-NEXT: bic r0, r1, r0 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 0, i32 1 + ret i32 %sel +} + +; select Cond, 1, 0 --> zext (Cond) + +define i32 @select_1_or_0(i1 %cond) { +; CHECK-LABEL: select_1_or_0: +; CHECK: @ BB#0: +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 1, i32 0 + ret i32 %sel +} + +define i32 @select_1_or_0_zeroext(i1 zeroext %cond) { +; CHECK-LABEL: select_1_or_0_zeroext: +; CHECK: @ BB#0: +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 1, i32 0 + ret i32 %sel +} + +define i32 @select_1_or_0_signext(i1 signext %cond) { +; CHECK-LABEL: select_1_or_0_signext: +; CHECK: @ BB#0: +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 1, i32 0 + ret i32 %sel +} + +; select Cond, 0, -1 --> sext (!Cond) + +define i32 @select_0_or_neg1(i1 %cond) { +; CHECK-LABEL: select_0_or_neg1: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #1 +; CHECK-NEXT: bic r0, r1, r0 +; CHECK-NEXT: rsb r0, r0, #0 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 0, i32 -1 + ret i32 %sel +} + +define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { +; CHECK-LABEL: select_0_or_neg1_zeroext: +; CHECK: @ BB#0: +; CHECK-NEXT: eor r0, r0, #1 +; CHECK-NEXT: rsb r0, r0, #0 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 0, i32 -1 + ret i32 %sel +} + +define i32 @select_0_or_neg1_signext(i1 signext %cond) { +; CHECK-LABEL: select_0_or_neg1_signext: +; CHECK: @ BB#0: +; CHECK-NEXT: mvn r0, r0 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 0, i32 -1 + ret i32 %sel +} + +define i32 @select_0_or_neg1_alt(i1 %cond) { +; CHECK-LABEL: select_0_or_neg1_alt: +; CHECK: @ BB#0: +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: sub r0, r0, #1 +; CHECK-NEXT: mov pc, lr + %z = zext i1 %cond to i32 + %add = add i32 %z, -1 + ret i32 %add +} + +define i32 @select_0_or_neg1_alt_zeroext(i1 zeroext %cond) { +; CHECK-LABEL: select_0_or_neg1_alt_zeroext: +; CHECK: @ BB#0: +; CHECK-NEXT: sub r0, r0, #1 +; CHECK-NEXT: mov pc, lr + %z = zext i1 %cond to i32 + %add = add i32 %z, -1 + ret i32 %add +} + +define i32 @select_0_or_neg1_alt_signext(i1 signext %cond) { +; CHECK-LABEL: select_0_or_neg1_alt_signext: +; CHECK: @ BB#0: +; CHECK-NEXT: mvn r0, r0 +; CHECK-NEXT: mov pc, lr + %z = zext i1 %cond to i32 + %add = add i32 %z, -1 + ret i32 %add +} + +; select Cond, -1, 0 --> sext (Cond) + +define i32 @select_neg1_or_0(i1 %cond) { +; CHECK-LABEL: select_neg1_or_0: +; CHECK: @ BB#0: +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: rsb r0, r0, #0 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 -1, i32 0 + ret i32 %sel +} + +define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { +; CHECK-LABEL: select_neg1_or_0_zeroext: +; CHECK: @ BB#0: +; CHECK-NEXT: rsb r0, r0, #0 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 -1, i32 0 + ret i32 %sel +} + +define i32 @select_neg1_or_0_signext(i1 signext %cond) { +; CHECK-LABEL: select_neg1_or_0_signext: +; CHECK: @ BB#0: +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 -1, i32 0 + ret i32 %sel +} + +; select Cond, C+1, C --> add (zext Cond), C + +define i32 @select_Cplus1_C(i1 %cond) { +; CHECK-LABEL: select_Cplus1_C: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #41 +; CHECK-NEXT: tst r0, #1 +; CHECK-NEXT: movne r1, #42 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 42, i32 41 + ret i32 %sel +} + +define i32 @select_Cplus1_C_zeroext(i1 zeroext %cond) { +; CHECK-LABEL: select_Cplus1_C_zeroext: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #41 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r1, #42 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 42, i32 41 + ret i32 %sel +} + +define i32 @select_Cplus1_C_signext(i1 signext %cond) { +; CHECK-LABEL: select_Cplus1_C_signext: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #41 +; CHECK-NEXT: tst r0, #1 +; CHECK-NEXT: movne r1, #42 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 42, i32 41 + ret i32 %sel +} + +; select Cond, C, C+1 --> add (sext Cond), C + +define i32 @select_C_Cplus1(i1 %cond) { +; CHECK-LABEL: select_C_Cplus1: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #42 +; CHECK-NEXT: tst r0, #1 +; CHECK-NEXT: movne r1, #41 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 41, i32 42 + ret i32 %sel +} + +define i32 @select_C_Cplus1_zeroext(i1 zeroext %cond) { +; CHECK-LABEL: select_C_Cplus1_zeroext: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #42 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r1, #41 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 41, i32 42 + ret i32 %sel +} + +define i32 @select_C_Cplus1_signext(i1 signext %cond) { +; CHECK-LABEL: select_C_Cplus1_signext: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #42 +; CHECK-NEXT: tst r0, #1 +; CHECK-NEXT: movne r1, #41 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 41, i32 42 + ret i32 %sel +} + +; In general, select of 2 constants could be: +; select Cond, C1, C2 --> add (mul (zext Cond), C1-C2), C2 --> add (and (sext Cond), C1-C2), C2 + +define i32 @select_C1_C2(i1 %cond) { +; CHECK-LABEL: select_C1_C2: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #165 +; CHECK-NEXT: tst r0, #1 +; CHECK-NEXT: orr r1, r1, #256 +; CHECK-NEXT: moveq r1, #42 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 421, i32 42 + ret i32 %sel +} + +define i32 @select_C1_C2_zeroext(i1 zeroext %cond) { +; CHECK-LABEL: select_C1_C2_zeroext: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #165 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: orr r1, r1, #256 +; CHECK-NEXT: moveq r1, #42 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 421, i32 42 + ret i32 %sel +} + +define i32 @select_C1_C2_signext(i1 signext %cond) { +; CHECK-LABEL: select_C1_C2_signext: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #165 +; CHECK-NEXT: tst r0, #1 +; CHECK-NEXT: orr r1, r1, #256 +; CHECK-NEXT: moveq r1, #42 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i32 421, i32 42 + ret i32 %sel +} + +; 4295032833 = 0x100010001. +; This becomes an opaque constant via ConstantHoisting, so we don't fold it into the select. + +define i64 @opaque_constant1(i1 %cond, i64 %x) { +; CHECK-LABEL: opaque_constant1: +; CHECK: @ BB#0: +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: ands r12, r0, #1 +; CHECK-NEXT: mov lr, #1 +; CHECK-NEXT: mov r0, #23 +; CHECK-NEXT: eor r3, r3, #1 +; CHECK-NEXT: orr lr, lr, #65536 +; CHECK-NEXT: mvnne r0, #3 +; CHECK-NEXT: movne r12, #1 +; CHECK-NEXT: and r4, r0, lr +; CHECK-NEXT: eor r2, r2, lr +; CHECK-NEXT: subs r0, r4, #1 +; CHECK-NEXT: sbc r1, r12, #0 +; CHECK-NEXT: orrs r2, r2, r3 +; CHECK-NEXT: movne r0, r4 +; CHECK-NEXT: movne r1, r12 +; CHECK-NEXT: pop {r4, lr} +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i64 -4, i64 23 + %bo = and i64 %sel, 4295032833 ; 0x100010001 + %cmp = icmp eq i64 %x, 4295032833 + %sext = sext i1 %cmp to i64 + %add = add i64 %bo, %sext + ret i64 %add +} + +; 65537 == 0x10001. +; This becomes an opaque constant via ConstantHoisting, so we don't fold it into the select. + +define i64 @opaque_constant2(i1 %cond, i64 %x) { +; CHECK-LABEL: opaque_constant2: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r1, #1 +; CHECK-NEXT: tst r0, #1 +; CHECK-NEXT: orr r1, r1, #65536 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: moveq r0, #23 +; CHECK-NEXT: and r0, r0, r1 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov pc, lr + %sel = select i1 %cond, i64 65537, i64 23 + %bo = and i64 %sel, 65537 + ret i64 %bo +} + diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index 8c1502e146550..09e8ed4bc096a 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -223,21 +223,19 @@ entry: ret i32 %add } -; Do not fold the xor into the select +; Fold the xor into the select. define i32 @t15(i32 %p) { entry: ; ARM-LABEL: t15: -; ARM: mov [[REG:r[0-9]+]], #2 +; ARM: mov [[REG:r[0-9]+]], #3 ; ARM: cmp r0, #8 -; ARM: movwgt [[REG:r[0-9]+]], #1 -; ARM: eor r0, [[REG:r[0-9]+]], #1 +; ARM: movwgt [[REG:r[0-9]+]], #0 ; T2-LABEL: t15: -; T2: movs [[REG:r[0-9]+]], #2 +; T2: movs [[REG:r[0-9]+]], #3 ; T2: cmp [[REG:r[0-9]+]], #8 ; T2: it gt -; T2: movgt [[REG:r[0-9]+]], #1 -; T2: eor r0, [[REG:r[0-9]+]], #1 +; T2: movgt [[REG:r[0-9]+]], #0 %cmp = icmp sgt i32 %p, 8 %a = select i1 %cmp, i32 1, i32 2 %xor = xor i32 %a, 1 diff --git a/test/CodeGen/ARM/setcc-logic.ll b/test/CodeGen/ARM/setcc-logic.ll new file mode 100644 index 0000000000000..79bae1facb3e5 --- /dev/null +++ b/test/CodeGen/ARM/setcc-logic.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s + +define zeroext i1 @ne_neg1_and_ne_zero(i32 %x) nounwind { +; CHECK-LABEL: ne_neg1_and_ne_zero: +; CHECK: @ BB#0: +; CHECK-NEXT: add r1, r0, #1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: cmp r1, #1 +; CHECK-NEXT: movwhi r0, #1 +; CHECK-NEXT: bx lr + %cmp1 = icmp ne i32 %x, -1 + %cmp2 = icmp ne i32 %x, 0 + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401 + +define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; CHECK-LABEL: and_eq: +; CHECK: @ BB#0: +; CHECK-NEXT: eor r2, r2, r3 +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: orrs r0, r0, r2 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: bx lr + %cmp1 = icmp eq i32 %a, %b + %cmp2 = icmp eq i32 %c, %d + %and = and i1 %cmp1, %cmp2 + ret i1 %and +} + +define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; CHECK-LABEL: or_ne: +; CHECK: @ BB#0: +; CHECK-NEXT: eor r2, r2, r3 +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: orrs r0, r0, r2 +; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: bx lr + %cmp1 = icmp ne i32 %a, %b + %cmp2 = icmp ne i32 %c, %d + %or = or i1 %cmp1, %cmp2 + ret i1 %or +} + +define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) nounwind { +; CHECK-LABEL: and_eq_vec: +; CHECK: @ BB#0: +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: add r12, sp, #40 +; CHECK-NEXT: add lr, sp, #8 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vld1.64 {d16, d17}, [lr] +; CHECK-NEXT: add r0, sp, #24 +; CHECK-NEXT: vld1.64 {d20, d21}, [r12] +; CHECK-NEXT: vceq.i32 q8, q9, q8 +; CHECK-NEXT: vld1.64 {d22, d23}, [r0] +; CHECK-NEXT: vceq.i32 q9, q11, q10 +; CHECK-NEXT: vmovn.i32 d16, q8 +; CHECK-NEXT: vmovn.i32 d17, q9 +; CHECK-NEXT: vand d16, d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: pop {r11, pc} + %cmp1 = icmp eq <4 x i32> %a, %b + %cmp2 = icmp eq <4 x i32> %c, %d + %and = and <4 x i1> %cmp1, %cmp2 + ret <4 x i1> %and +} + diff --git a/test/CodeGen/ARM/setcc-sentinals.ll b/test/CodeGen/ARM/setcc-sentinals.ll deleted file mode 100644 index dc45e0e13881d..0000000000000 --- a/test/CodeGen/ARM/setcc-sentinals.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -asm-verbose=false %s -o - | FileCheck %s - -define zeroext i1 @test0(i32 %x) nounwind { -; CHECK-LABEL: test0: -; CHECK: add [[REG:(r[0-9]+)|(lr)]], r0, #1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: cmp [[REG]], #1 -; CHECK-NEXT: movwhi r0, #1 -; CHECK-NEXT: bx lr - %cmp1 = icmp ne i32 %x, -1 - %not.cmp = icmp ne i32 %x, 0 - %.cmp1 = and i1 %cmp1, %not.cmp - ret i1 %.cmp1 -} diff --git a/test/CodeGen/ARM/single-issue-r52.mir b/test/CodeGen/ARM/single-issue-r52.mir new file mode 100644 index 0000000000000..6c95f7603e6e0 --- /dev/null +++ b/test/CodeGen/ARM/single-issue-r52.mir @@ -0,0 +1,86 @@ +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-topdown 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=TOPDOWN +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-bottomup 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOTTOMUP +# REQUIRES: asserts +--- | + ; ModuleID = 'foo.ll' + source_filename = "foo.ll" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "arm---eabi" + + %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } + ; Function Attrs: nounwind + define <8 x i8> @foo(i8* %A) { + %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8) + %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 1 + %tmp4 = add <8 x i8> %tmp2, %tmp3 + ret <8 x i8> %tmp4 + } + declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8*, i32) + +# CHECK: ********** MI Scheduling ********** +# CHECK: ScheduleDAGMILive::schedule starting +# CHECK: SU(1): %vreg1<def> = VLD4d8Pseudo %vreg0, 8, pred:14, pred:%noreg; mem:LD32[%A](align=8) QQPR:%vreg1 GPR:%vreg0 +# CHECK: Latency : 8 +# CHECK: Single Issue : true; +# CHECK: SU(2): %vreg4<def> = VADDv8i8 %vreg1:dsub_0, %vreg1:dsub_1, pred:14, pred:%noreg; DPR:%vreg4 QQPR:%vreg1 +# CHECK: Latency : 5 +# CHECK: Single Issue : false; +# CHECK: SU(3): %vreg5<def>, %vreg6<def> = VMOVRRD %vreg4, pred:14, pred:%noreg; GPR:%vreg5,%vreg6 DPR:%vreg4 +# CHECK: Latency : 4 +# CHECK: Single Issue : false; + +# TOPDOWN: Scheduling SU(1) %vreg1<def> = VLD4d8Pseudo +# TOPDOWN: Bump cycle to end group +# TOPDOWN: Scheduling SU(2) %vreg4<def> = VADDv8i8 + +# BOTTOMUP: Scheduling SU(2) %vreg4<def> = VADDv8i8 +# BOTTOMUP: Scheduling SU(1) %vreg1<def> = VLD4d8Pseudo +# BOTTOMUP: Bump cycle to begin group + +... +--- +name: foo +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: qqpr } + - { id: 2, class: dpr } + - { id: 3, class: dpr } + - { id: 4, class: dpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } +liveins: + - { reg: '%r0', virtual-reg: '%0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + liveins: %r0 + + %0 = COPY %r0 + %1 = VLD4d8Pseudo %0, 8, 14, _ :: (load 32 from %ir.A, align 8) + %4 = VADDv8i8 %1.dsub_0, %1.dsub_1, 14, _ + %5, %6 = VMOVRRD %4, 14, _ + %r0 = COPY %5 + %r1 = COPY %6 + BX_RET 14, _, implicit %r0, implicit killed %r1 + +... diff --git a/test/CodeGen/ARM/sjljeh-swifterror.ll b/test/CodeGen/ARM/sjljeh-swifterror.ll new file mode 100644 index 0000000000000..aae0e75c98afb --- /dev/null +++ b/test/CodeGen/ARM/sjljeh-swifterror.ll @@ -0,0 +1,27 @@ +; RUN: opt -sjljehprepare -verify < %s | FileCheck %s +target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" +target triple = "armv7s-apple-ios7.0" + +%swift.error = type opaque + +declare void @objc_msgSend() local_unnamed_addr + +declare i32 @__objc_personality_v0(...) + +; Make sure we don't leave a select on a swifterror argument. +; CHECK-LABEL; @test +; CHECK-NOT: select true, %0 +define swiftcc void @test(%swift.error** swifterror) local_unnamed_addr personality i32 (...)* @__objc_personality_v0 { +entry: + %call28.i = invoke i32 bitcast (void ()* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef) + to label %invoke.cont.i unwind label %lpad.i + +invoke.cont.i: + unreachable + +lpad.i: + %1 = landingpad { i8*, i32 } + cleanup + resume { i8*, i32 } undef +} + diff --git a/test/CodeGen/ARM/smml.ll b/test/CodeGen/ARM/smml.ll index aa093192f2b22..4788644cf1958 100644 --- a/test/CodeGen/ARM/smml.ll +++ b/test/CodeGen/ARM/smml.ll @@ -1,20 +1,15 @@ -; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6 -; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7 -; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMB -; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMB -; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMBV6T2 -; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMBV7 +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V4 +; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V6 +; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V6 +; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMB +; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMBV6 +; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMBV6T2 +; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMBV6T2 define i32 @Test0(i32 %a, i32 %b, i32 %c) nounwind readnone ssp { entry: ; CHECK-LABEL: Test0 ; CHECK-NOT: smmls -; CHECK-V6-NOT: smmls -; CHECK-V7-NOT: smmls -; CHECK_THUMB-NOT: smmls -; CHECK-THUMBV6T2-NOT: smmls -; CHECK-THUMBV7-NOT: smmls %conv4 = zext i32 %a to i64 %conv1 = sext i32 %b to i64 %conv2 = sext i32 %c to i64 @@ -27,12 +22,11 @@ entry: define i32 @Test1(i32 %a, i32 %b, i32 %c) { ;CHECK-LABEL: Test1 -;CHECK-NOT: smmls +;CHECK-V4-NOT: smmls ;CHECK-THUMB-NOT: smmls +;CHECK-THUMBV6-NOT: smmls ;CHECK-V6: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0 -;CHECK-V7: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0 ;CHECK-THUMBV6T2: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0 -;CHECK-THUMBV7: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0 entry: %conv = sext i32 %b to i64 %conv1 = sext i32 %c to i64 @@ -47,10 +41,21 @@ entry: declare void @opaque(i32) define void @test_used_flags(i32 %in1, i32 %in2) { -; CHECK-V7-LABEL: test_used_flags: -; CHECK-V7: smull [[PROD_LO:r[0-9]+]], [[PROD_HI:r[0-9]+]], r0, r1 -; CHECK-V7: rsbs {{.*}}, [[PROD_LO]], #0 -; CHECK-V7: rscs {{.*}}, [[PROD_HI]], #0 +; CHECK-LABEL: test_used_flags: +; CHECK-THUMB: cmp r1, #0 +; CHECK-THUMB: push {r2} +; CHECK-THUMB: pop {r3} +; CHECK-THUMB: ble +; CHECK-THUMBV6: cmp r1, #0 +; CHECK-THUMBV6: mov r3, r2 +; CHECK-THUMBV6: ble +; CHECK-V6: smull [[PROD_LO:r[0-9]+]], [[PROD_HI:r[0-9]+]], r0, r1 +; CHECK-V6: rsbs {{.*}}, [[PROD_LO]], #0 +; CHECK-V6: rscs {{.*}}, [[PROD_HI]], #0 +; CHECK-THUMBV6T2: smull [[PROD_LO:r[0-9]+]], [[PROD_HI:r[0-9]+]], r0, r1 +; CHECK-THUMBV6T2: movs [[ZERO:r[0-9]+]], #0 +; CHECK-THUMBV6T2: rsbs {{.*}}, [[PROD_LO]], #0 +; CHECK-THUMBV6T2: sbcs.w {{.*}}, [[ZERO]], [[PROD_HI]] %in1.64 = sext i32 %in1 to i64 %in2.64 = sext i32 %in2 to i64 %mul = mul nsw i64 %in1.64, %in2.64 diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll index 3c187aa846d54..2b7be41ddb24e 100644 --- a/test/CodeGen/ARM/smul.ll +++ b/test/CodeGen/ARM/smul.ll @@ -262,3 +262,32 @@ define i32 @f21(i32 %a, i32 %x, i16 %y) { %tmp5 = add i32 %a, %tmp4 ret i32 %tmp5 } + +@global_b = external global i16, align 2 + +define i32 @f22(i32 %a) { +; CHECK-LABEL: f22: +; CHECK: smulwb r0, r0, r1 +; CHECK-THUMBV6-NOT: smulwb + %b = load i16, i16* @global_b, align 2 + %sext = sext i16 %b to i64 + %conv = sext i32 %a to i64 + %mul = mul nsw i64 %sext, %conv + %shr37 = lshr i64 %mul, 16 + %conv4 = trunc i64 %shr37 to i32 + ret i32 %conv4 +} + +define i32 @f23(i32 %a, i32 %c) { +; CHECK-LABEL: f23: +; CHECK: smlawb r0, r0, r2, r1 +; CHECK-THUMBV6-NOT: smlawb + %b = load i16, i16* @global_b, align 2 + %sext = sext i16 %b to i64 + %conv = sext i32 %a to i64 + %mul = mul nsw i64 %sext, %conv + %shr49 = lshr i64 %mul, 16 + %conv5 = trunc i64 %shr49 to i32 + %add = add nsw i32 %conv5, %c + ret i32 %add +} diff --git a/test/CodeGen/ARM/softfp-fabs-fneg.ll b/test/CodeGen/ARM/softfp-fabs-fneg.ll index b608fb840218a..b7c684d35b571 100644 --- a/test/CodeGen/ARM/softfp-fabs-fneg.ll +++ b/test/CodeGen/ARM/softfp-fabs-fneg.ll @@ -14,8 +14,7 @@ define double @f(double %a) { define float @g(float %a) { ; CHECK-LABEL: g: - ; CHECK-THUMB: bic r0, r0, #-2147483648 - ; CHECK-ARM: bfc r0, #31, #1 + ; CHECK: bic r0, r0, #-2147483648 ; CHECK-NEXT: bx lr %x = call float @llvm.fabs.f32(float %a) readnone ret float %x diff --git a/test/CodeGen/ARM/special-reg-mcore.ll b/test/CodeGen/ARM/special-reg-mcore.ll index 45e6db9e78fe1..1ecf8dc77a701 100644 --- a/test/CodeGen/ARM/special-reg-mcore.ll +++ b/test/CodeGen/ARM/special-reg-mcore.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m4 2>&1 | FileCheck %s --check-prefix=MCORE +; RUN: llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m4 --show-mc-encoding 2>&1 | FileCheck %s --check-prefix=MCORE ; RUN: not llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m3 2>&1 | FileCheck %s --check-prefix=M3CORE ; RUN: not llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s --check-prefix=ACORE @@ -8,20 +8,20 @@ define i32 @read_mclass_registers() nounwind { entry: ; MCORE-LABEL: read_mclass_registers: - ; MCORE: mrs r0, apsr - ; MCORE: mrs r1, iapsr - ; MCORE: mrs r1, eapsr - ; MCORE: mrs r1, xpsr - ; MCORE: mrs r1, ipsr - ; MCORE: mrs r1, epsr - ; MCORE: mrs r1, iepsr - ; MCORE: mrs r1, msp - ; MCORE: mrs r1, psp - ; MCORE: mrs r1, primask - ; MCORE: mrs r1, basepri - ; MCORE: mrs r1, basepri_max - ; MCORE: mrs r1, faultmask - ; MCORE: mrs r1, control + ; MCORE: mrs r0, apsr @ encoding: [0xef,0xf3,0x00,0x80] + ; MCORE: mrs r1, iapsr @ encoding: [0xef,0xf3,0x01,0x81] + ; MCORE: mrs r1, eapsr @ encoding: [0xef,0xf3,0x02,0x81] + ; MCORE: mrs r1, xpsr @ encoding: [0xef,0xf3,0x03,0x81] + ; MCORE: mrs r1, ipsr @ encoding: [0xef,0xf3,0x05,0x81] + ; MCORE: mrs r1, epsr @ encoding: [0xef,0xf3,0x06,0x81] + ; MCORE: mrs r1, iepsr @ encoding: [0xef,0xf3,0x07,0x81] + ; MCORE: mrs r1, msp @ encoding: [0xef,0xf3,0x08,0x81] + ; MCORE: mrs r1, psp @ encoding: [0xef,0xf3,0x09,0x81] + ; MCORE: mrs r1, primask @ encoding: [0xef,0xf3,0x10,0x81] + ; MCORE: mrs r1, basepri @ encoding: [0xef,0xf3,0x11,0x81] + ; MCORE: mrs r1, basepri_max @ encoding: [0xef,0xf3,0x12,0x81] + ; MCORE: mrs r1, faultmask @ encoding: [0xef,0xf3,0x13,0x81] + ; MCORE: mrs r1, control @ encoding: [0xef,0xf3,0x14,0x81] %0 = call i32 @llvm.read_register.i32(metadata !0) %1 = call i32 @llvm.read_register.i32(metadata !4) @@ -56,32 +56,32 @@ entry: define void @write_mclass_registers(i32 %x) nounwind { entry: ; MCORE-LABEL: write_mclass_registers: - ; MCORE: msr apsr_nzcvqg, r0 - ; MCORE: msr apsr_nzcvq, r0 - ; MCORE: msr apsr_g, r0 - ; MCORE: msr apsr_nzcvqg, r0 - ; MCORE: msr iapsr_nzcvqg, r0 - ; MCORE: msr iapsr_nzcvq, r0 - ; MCORE: msr iapsr_g, r0 - ; MCORE: msr iapsr_nzcvqg, r0 - ; MCORE: msr eapsr_nzcvqg, r0 - ; MCORE: msr eapsr_nzcvq, r0 - ; MCORE: msr eapsr_g, r0 - ; MCORE: msr eapsr_nzcvqg, r0 - ; MCORE: msr xpsr_nzcvqg, r0 - ; MCORE: msr xpsr_nzcvq, r0 - ; MCORE: msr xpsr_g, r0 - ; MCORE: msr xpsr_nzcvqg, r0 - ; MCORE: msr ipsr, r0 - ; MCORE: msr epsr, r0 - ; MCORE: msr iepsr, r0 - ; MCORE: msr msp, r0 - ; MCORE: msr psp, r0 - ; MCORE: msr primask, r0 - ; MCORE: msr basepri, r0 - ; MCORE: msr basepri_max, r0 - ; MCORE: msr faultmask, r0 - ; MCORE: msr control, r0 + ; MCORE: msr apsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x00,0x88] + ; MCORE: msr apsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x00,0x88] + ; MCORE: msr apsr_g, r0 @ encoding: [0x80,0xf3,0x00,0x84] + ; MCORE: msr apsr_nzcvqg, r0 @ encoding: [0x80,0xf3,0x00,0x8c] + ; MCORE: msr iapsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x01,0x88] + ; MCORE: msr iapsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x01,0x88] + ; MCORE: msr iapsr_g, r0 @ encoding: [0x80,0xf3,0x01,0x84] + ; MCORE: msr iapsr_nzcvqg, r0 @ encoding: [0x80,0xf3,0x01,0x8c] + ; MCORE: msr eapsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x02,0x88] + ; MCORE: msr eapsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x02,0x88] + ; MCORE: msr eapsr_g, r0 @ encoding: [0x80,0xf3,0x02,0x84] + ; MCORE: msr eapsr_nzcvqg, r0 @ encoding: [0x80,0xf3,0x02,0x8c] + ; MCORE: msr xpsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x03,0x88] + ; MCORE: msr xpsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x03,0x88] + ; MCORE: msr xpsr_g, r0 @ encoding: [0x80,0xf3,0x03,0x84] + ; MCORE: msr xpsr_nzcvqg, r0 @ encoding: [0x80,0xf3,0x03,0x8c] + ; MCORE: msr ipsr, r0 @ encoding: [0x80,0xf3,0x05,0x88] + ; MCORE: msr epsr, r0 @ encoding: [0x80,0xf3,0x06,0x88] + ; MCORE: msr iepsr, r0 @ encoding: [0x80,0xf3,0x07,0x88] + ; MCORE: msr msp, r0 @ encoding: [0x80,0xf3,0x08,0x88] + ; MCORE: msr psp, r0 @ encoding: [0x80,0xf3,0x09,0x88] + ; MCORE: msr primask, r0 @ encoding: [0x80,0xf3,0x10,0x88] + ; MCORE: msr basepri, r0 @ encoding: [0x80,0xf3,0x11,0x88] + ; MCORE: msr basepri_max, r0 @ encoding: [0x80,0xf3,0x12,0x88] + ; MCORE: msr faultmask, r0 @ encoding: [0x80,0xf3,0x13,0x88] + ; MCORE: msr control, r0 @ encoding: [0x80,0xf3,0x14,0x88] call void @llvm.write_register.i32(metadata !0, i32 %x) call void @llvm.write_register.i32(metadata !1, i32 %x) diff --git a/test/CodeGen/ARM/special-reg-v8m-main.ll b/test/CodeGen/ARM/special-reg-v8m-main.ll index cde296c6b218f..ea9c01487d854 100644 --- a/test/CodeGen/ARM/special-reg-v8m-main.ll +++ b/test/CodeGen/ARM/special-reg-v8m-main.ll @@ -90,19 +90,19 @@ entry: define void @write_mclass_registers(i32 %x) nounwind { entry: ; MAINLINE-LABEL: write_mclass_registers: - ; MAINLINE: msr apsr_nzcvqg, r0 + ; MAINLINE: msr apsr_nzcvq, r0 ; MAINLINE: msr apsr_nzcvq, r0 ; MAINLINE: msr apsr_g, r0 ; MAINLINE: msr apsr_nzcvqg, r0 - ; MAINLINE: msr iapsr_nzcvqg, r0 + ; MAINLINE: msr iapsr_nzcvq, r0 ; MAINLINE: msr iapsr_nzcvq, r0 ; MAINLINE: msr iapsr_g, r0 ; MAINLINE: msr iapsr_nzcvqg, r0 - ; MAINLINE: msr eapsr_nzcvqg, r0 + ; MAINLINE: msr eapsr_nzcvq, r0 ; MAINLINE: msr eapsr_nzcvq, r0 ; MAINLINE: msr eapsr_g, r0 ; MAINLINE: msr eapsr_nzcvqg, r0 - ; MAINLINE: msr xpsr_nzcvqg, r0 + ; MAINLINE: msr xpsr_nzcvq, r0 ; MAINLINE: msr xpsr_nzcvq, r0 ; MAINLINE: msr xpsr_g, r0 ; MAINLINE: msr xpsr_nzcvqg, r0 diff --git a/test/CodeGen/ARM/stack_guard_remat.ll b/test/CodeGen/ARM/stack_guard_remat.ll index 99d4994984506..9b5677608d266 100644 --- a/test/CodeGen/ARM/stack_guard_remat.ll +++ b/test/CodeGen/ARM/stack_guard_remat.ll @@ -51,20 +51,20 @@ define i32 @test_stack_guard_remat() #0 { %a1 = alloca [256 x i32], align 4 %1 = bitcast [256 x i32]* %a1 to i8* - call void @llvm.lifetime.start(i64 1024, i8* %1) + call void @llvm.lifetime.start.p0i8(i64 1024, i8* %1) %2 = getelementptr inbounds [256 x i32], [256 x i32]* %a1, i32 0, i32 0 call void @foo3(i32* %2) #3 call void asm sideeffect "foo2", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"() - call void @llvm.lifetime.end(i64 1024, i8* %1) + call void @llvm.lifetime.end.p0i8(i64 1024, i8* %1) ret i32 0 } ; Function Attrs: nounwind -declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) declare void @foo3(i32*) ; Function Attrs: nounwind -declare void @llvm.lifetime.end(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/ARM/static-addr-hoisting.ll b/test/CodeGen/ARM/static-addr-hoisting.ll index 3d47e02f965e8..683d607936b85 100644 --- a/test/CodeGen/ARM/static-addr-hoisting.ll +++ b/test/CodeGen/ARM/static-addr-hoisting.ll @@ -6,9 +6,9 @@ define void @multiple_store() { ; CHECK: movs [[VAL:r[0-9]+]], #42 ; CHECK: movt r[[BASE1]], #15 -; CHECK: str [[VAL]], [r[[BASE1]]] -; CHECK: str [[VAL]], [r[[BASE1]], #24] -; CHECK: str.w [[VAL]], [r[[BASE1]], #42] +; CHECK-DAG: str [[VAL]], [r[[BASE1]]] +; CHECK-DAG: str [[VAL]], [r[[BASE1]], #24] +; CHECK-DAG: str.w [[VAL]], [r[[BASE1]], #42] ; CHECK: movw r[[BASE2:[0-9]+]], #20394 ; CHECK: movt r[[BASE2]], #18 diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll index 37e9a4af3be59..475b80b3bb070 100644 --- a/test/CodeGen/ARM/tail-opts.ll +++ b/test/CodeGen/ARM/tail-opts.ll @@ -65,3 +65,55 @@ altret: call void @far(i32 1001) ret void } + +; Use alternating abort functions so that the blocks we wish to merge are not +; layout successors during branch folding. + +; CHECK-LABEL: merge_alternating_aborts: +; CHECK-NOT: _abort +; CHECK-NOT: _alt_abort +; CHECK: bxne lr +; CHECK-NOT: _abort +; CHECK-NOT: _alt_abort +; CHECK: LBB{{.*}}: +; CHECK: mov lr, pc +; CHECK: b _alt_abort +; CHECK-NOT: _abort +; CHECK-NOT: _alt_abort +; CHECK: LBB{{.*}}: +; CHECK: mov lr, pc +; CHECK: b _abort +; CHECK-NOT: _abort +; CHECK-NOT: _alt_abort + +declare void @abort() +declare void @alt_abort() + +define void @merge_alternating_aborts() { +entry: + %c1 = call i1 @qux() + br i1 %c1, label %cont1, label %abort1 +abort1: + call void @abort() + unreachable +cont1: + %c2 = call i1 @qux() + br i1 %c2, label %cont2, label %abort2 +abort2: + call void @alt_abort() + unreachable +cont2: + %c3 = call i1 @qux() + br i1 %c3, label %cont3, label %abort3 +abort3: + call void @abort() + unreachable +cont3: + %c4 = call i1 @qux() + br i1 %c4, label %cont4, label %abort4 +abort4: + call void @alt_abort() + unreachable +cont4: + ret void +} diff --git a/test/CodeGen/ARM/thumb1-div.ll b/test/CodeGen/ARM/thumb1-div.ll new file mode 100644 index 0000000000000..844dfe6f963c1 --- /dev/null +++ b/test/CodeGen/ARM/thumb1-div.ll @@ -0,0 +1,67 @@ +; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-m23 -march=thumb | \ +; RUN: FileCheck %s -check-prefix=CHECK + +define i32 @f1(i32 %a, i32 %b) { +entry: +; CHECK-LABEL: f1 + +; CHECK: sdiv + %tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1] + ret i32 %tmp1 +} + +define i32 @f2(i32 %a, i32 %b) { +entry: +; CHECK-LABEL: f2 +; CHECK: udiv + %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1] + ret i32 %tmp1 +} + +define i32 @f3(i32 %a, i32 %b) { +entry: +; CHECK-LABEL: f3 + + + %tmp1 = srem i32 %a, %b ; <i32> [#uses=1] + ret i32 %tmp1 +; CHECK: sdiv +; CHECK-NEXT: muls +; CHECK-NEXT: subs +} + +define i32 @f4(i32 %a, i32 %b) { +entry: +; CHECK-LABEL: f4 + +; CHECK: udiv +; CHECK-NEXT: muls +; CHECK-NEXT: subs + %tmp1 = urem i32 %a, %b ; <i32> [#uses=1] + ret i32 %tmp1 +} + + +define i64 @f5(i64 %a, i64 %b) { +entry: +; CHECK-LABEL: f5 + +; EABI MODE = Remainder in R2-R3, quotient in R0-R1 +; CHECK: __aeabi_ldivmod +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 + %tmp1 = srem i64 %a, %b ; <i64> [#uses=1] + ret i64 %tmp1 +} + +define i64 @f6(i64 %a, i64 %b) { +entry: +; CHECK-LABEL: f6 + +; EABI MODE = Remainder in R2-R3, quotient in R0-R1 +; CHECK: __aeabi_uldivmod +; CHECK: mov r0, r2 +; CHECK: mov r1, r3 + %tmp1 = urem i64 %a, %b ; <i64> [#uses=1] + ret i64 %tmp1 +} diff --git a/test/CodeGen/ARM/unschedule-first-call.ll b/test/CodeGen/ARM/unschedule-first-call.ll new file mode 100644 index 0000000000000..4a218afcc5e13 --- /dev/null +++ b/test/CodeGen/ARM/unschedule-first-call.ll @@ -0,0 +1,136 @@ +; RUN: llc < %s +; PR30911 + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv6kz--linux-gnueabihf" + +; Function Attrs: nounwind +define void @dradbg(i32, i32, float*, float*, float*, float*, float*) #0 { + br i1 undef, label %.critedge, label %8 + +.critedge: ; preds = %7 + %.mux2 = select i1 undef, i1 undef, i1 true + br label %8 + +; <label>:8: ; preds = %.critedge, %7 + %9 = getelementptr float, float* %3, i64 undef + %10 = ptrtoint float* %9 to i32 + %11 = icmp ule i32 %10, undef + %12 = getelementptr float, float* %5, i64 undef + %13 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) + %14 = extractvalue { i64, i1 } %13, 0 + %15 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %14, i64 1) + %16 = extractvalue { i64, i1 } %15, 0 + %17 = icmp slt i64 1, %16 + %18 = select i1 %17, i64 1, i64 %16 + %19 = sext i32 %1 to i64 + %20 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %18, i64 %19) + %21 = extractvalue { i64, i1 } %20, 0 + %22 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %21, i64 0) + %23 = extractvalue { i64, i1 } %22, 0 + %24 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %23, i64 undef) + %25 = extractvalue { i64, i1 } %24, 0 + %26 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %25, i64 0) + %27 = extractvalue { i64, i1 } %26, 0 + %28 = getelementptr float, float* %3, i64 %27 + %29 = ptrtoint float* %12 to i32 + %30 = ptrtoint float* %28 to i32 + %31 = icmp ule i32 %29, %30 + %32 = or i1 %11, %31 + %33 = and i1 false, %32 + %34 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 0, i64 undef) + %35 = extractvalue { i64, i1 } %34, 0 + %36 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %35, i64 1) + %37 = extractvalue { i64, i1 } %36, 0 + %38 = icmp slt i64 1, %37 + %39 = select i1 %38, i64 1, i64 %37 + %40 = sext i32 %1 to i64 + %41 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %39, i64 %40) + %42 = extractvalue { i64, i1 } %41, 0 + %43 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %42, i64 0) + %44 = extractvalue { i64, i1 } %43, 0 + %45 = sext i32 %0 to i64 + %46 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %44, i64 %45) + %47 = extractvalue { i64, i1 } %46, 0 + %48 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %47, i64 0) + %49 = extractvalue { i64, i1 } %48, 0 + %50 = getelementptr float, float* %5, i64 %49 + %51 = ptrtoint float* %50 to i32 + %52 = icmp ule i32 undef, %51 + %53 = getelementptr float, float* %4, i64 undef + %54 = ptrtoint float* %53 to i32 + %55 = icmp ule i32 undef, %54 + %56 = or i1 %52, %55 + %57 = and i1 %33, %56 + %58 = getelementptr float, float* %2, i64 undef + %59 = ptrtoint float* %58 to i32 + %60 = icmp ule i32 %59, undef + %61 = select i1 undef, i64 undef, i64 0 + %62 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %61, i64 undef) + %63 = extractvalue { i64, i1 } %62, 0 + %64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 1) + %65 = extractvalue { i64, i1 } %64, 0 + %66 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %63, i64 %65) + %67 = extractvalue { i64, i1 } %66, 0 + %68 = sext i32 %0 to i64 + %69 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %67, i64 %68) + %70 = extractvalue { i64, i1 } %69, 0 + %71 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %70, i64 0) + %72 = extractvalue { i64, i1 } %71, 0 + %73 = getelementptr float, float* %5, i64 %72 + %74 = ptrtoint float* %73 to i32 + %75 = icmp ule i32 %74, undef + %76 = or i1 %60, %75 + %77 = and i1 %57, %76 + %78 = getelementptr float, float* %6, i64 undef + %79 = ptrtoint float* %78 to i32 + %80 = icmp ule i32 %79, undef + %81 = getelementptr float, float* %5, i64 undef + %82 = ptrtoint float* %81 to i32 + %83 = icmp ule i32 %82, undef + %84 = or i1 %80, %83 + %85 = and i1 %77, %84 + %86 = and i1 %85, undef + %87 = and i1 %86, undef + %88 = and i1 %87, undef + %89 = and i1 %88, undef + %90 = and i1 %89, undef + %91 = and i1 %90, undef + %92 = and i1 %91, undef + %93 = and i1 %92, undef + %94 = and i1 %93, undef + %95 = and i1 %94, undef + br i1 %95, label %97, label %96 + +; <label>:96: ; preds = %8 + br i1 undef, label %.critedge122, label %.critedge110 + +.critedge122: ; preds = %.critedge122, %96 + br i1 false, label %.critedge122, label %.critedge110 + +.critedge110: ; preds = %.critedge219, %97, %.critedge122, %96 + ret void + +; <label>:97: ; preds = %8 + br i1 undef, label %.critedge219, label %.critedge110 + +.critedge219: ; preds = %.critedge219, %97 + %.pr287 = phi i1 [ undef, %.critedge219 ], [ true, %97 ] + br i1 %.pr287, label %.critedge219, label %.critedge110 +} + +; Function Attrs: nounwind readnone +declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64) #1 + +; Function Attrs: nounwind readnone +declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) #1 + +; Function Attrs: nounwind readnone +declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #1 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-cpu"="arm1176jzf-s" "target-features"="+dsp,+strict-align,+vfp2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 4.0.0 (trunk 285923) (llvm/trunk 285921)"} diff --git a/test/CodeGen/ARM/v6-jumptable-clobber.mir b/test/CodeGen/ARM/v6-jumptable-clobber.mir new file mode 100644 index 0000000000000..0e9bc42565f3b --- /dev/null +++ b/test/CodeGen/ARM/v6-jumptable-clobber.mir @@ -0,0 +1,384 @@ +# RUN: llc -run-pass=arm-cp-islands -o - %s | FileCheck %s + +# Test created by tweaking the register allocation after stopping the IR below +# just before constant islands. We were forwarding the table index to the end of +# the block, even though the LEA clobbered it. + +# CHECK-LABEL: name: foo +# CHECK: tBR_JT + # This order is important. If the jump-table comes first then the + # transformation is valid because the LEA can be removed, see second test. +# CHECK: CONSTPOOL_ENTRY +# CHECK: JUMPTABLE_ADDRS + +# CHECK-LABEL: name: bar +# CHECK: tTBB_JT %pc, killed %r1 + +--- | + ; ModuleID = 'simple.ll' + source_filename = "simple.ll" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define void @foo(i8 %in, i32* %addr) { + store i32 12345678, i32* %addr + %1 = call i32 @llvm.arm.space(i32 980, i32 undef) + %2 = zext i8 %in to i32 + switch i32 %2, label %default [ + i32 0, label %d1 + i32 1, label %d2 + i32 3, label %d3 + i32 4, label %d4 + i32 5, label %d5 + i32 6, label %d6 + i32 7, label %d7 + i32 2, label %d8 + i32 8, label %d9 + i32 9, label %d10 + i32 19, label %d11 + i32 20, label %d12 + i32 21, label %d13 + i32 22, label %d14 + i32 24, label %d15 + i32 25, label %d16 + i32 26, label %d17 + ] + + default: ; preds = %0 + unreachable + + d1: ; preds = %0 + unreachable + + d2: ; preds = %0 + unreachable + + d3: ; preds = %0 + unreachable + + d4: ; preds = %0 + unreachable + + d5: ; preds = %0 + unreachable + + d6: ; preds = %0 + unreachable + + d7: ; preds = %0 + unreachable + + d8: ; preds = %0 + unreachable + + d9: ; preds = %0 + unreachable + + d10: ; preds = %0 + unreachable + + d11: ; preds = %0 + unreachable + + d12: ; preds = %0 + unreachable + + d13: ; preds = %0 + unreachable + + d14: ; preds = %0 + unreachable + + d15: ; preds = %0 + unreachable + + d16: ; preds = %0 + unreachable + + d17: ; preds = %0 + unreachable + } + + define void @bar(i8 %in, i32* %addr) { + store i32 12345678, i32* %addr + %1 = zext i8 %in to i32 + switch i32 %1, label %default [ + i32 0, label %d1 + i32 1, label %d2 + i32 3, label %d3 + i32 4, label %d4 + i32 5, label %d5 + i32 6, label %d6 + i32 7, label %d7 + i32 2, label %d8 + i32 8, label %d9 + i32 9, label %d10 + i32 19, label %d11 + i32 20, label %d12 + i32 21, label %d13 + i32 22, label %d14 + i32 24, label %d15 + i32 25, label %d16 + i32 26, label %d17 + ] + + default: ; preds = %0 + unreachable + + d1: ; preds = %0 + unreachable + + d2: ; preds = %0 + unreachable + + d3: ; preds = %0 + unreachable + + d4: ; preds = %0 + unreachable + + d5: ; preds = %0 + unreachable + + d6: ; preds = %0 + unreachable + + d7: ; preds = %0 + unreachable + + d8: ; preds = %0 + unreachable + + d9: ; preds = %0 + unreachable + + d10: ; preds = %0 + unreachable + + d11: ; preds = %0 + unreachable + + d12: ; preds = %0 + unreachable + + d13: ; preds = %0 + unreachable + + d14: ; preds = %0 + unreachable + + d15: ; preds = %0 + unreachable + + d16: ; preds = %0 + unreachable + + d17: ; preds = %0 + unreachable + } + + ; Function Attrs: nounwind + declare i32 @llvm.arm.space(i32, i32) #0 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + +... +--- +name: foo +alignment: 1 +exposesReturnsTwice: false +noVRegs: true +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%r0' } + - { reg: '%r1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +constants: + - id: 0 + value: i32 12345678 + alignment: 4 +jumpTable: + kind: inline + entries: + - id: 0 + blocks: [ '%bb.3.d2', '%bb.9.d8', '%bb.4.d3', '%bb.5.d4', + '%bb.6.d5', '%bb.7.d6', '%bb.8.d7', '%bb.10.d9', + '%bb.11.d10', '%bb.2.d1', '%bb.2.d1', '%bb.2.d1', + '%bb.2.d1', '%bb.2.d1', '%bb.2.d1', '%bb.2.d1', + '%bb.2.d1', '%bb.2.d1', '%bb.12.d11', '%bb.13.d12', + '%bb.14.d13', '%bb.15.d14', '%bb.2.d1', '%bb.16.d15', + '%bb.17.d16', '%bb.18.d17' ] +body: | + bb.0 (%ir-block.0): + successors: %bb.2.d1(0x03c3c3c4), %bb.1(0x7c3c3c3c) + liveins: %r0, %r1 + + %r2 = tLDRpci %const.0, 14, _ + tSTRi killed %r2, killed %r1, 0, 14, _ :: (store 4 into %ir.addr) + dead %r1 = SPACE 980, undef %r0 + %r0 = tUXTB killed %r0, 14, _ + %r1, dead %cpsr = tSUBi3 killed %r0, 1, 14, _ + tCMPi8 %r1, 25, 14, _, implicit-def %cpsr + tBcc %bb.2.d1, 8, killed %cpsr + + bb.1 (%ir-block.0): + successors: %bb.3.d2(0x07c549d2), %bb.9.d8(0x07c549d2), %bb.4.d3(0x07c549d2), %bb.5.d4(0x07c549d2), %bb.6.d5(0x07c549d2), %bb.7.d6(0x07c549d2), %bb.8.d7(0x07c549d2), %bb.10.d9(0x07c549d2), %bb.11.d10(0x07c549d2), %bb.2.d1(0x03ab62db), %bb.12.d11(0x07c549d2), %bb.13.d12(0x07c549d2), %bb.14.d13(0x07c549d2), %bb.15.d14(0x07c549d2), %bb.16.d15(0x07c549d2), %bb.17.d16(0x07c549d2), %bb.18.d17(0x07c549d2) + liveins: %r1 + + %r0, dead %cpsr = tLSLri killed %r1, 2, 14, _ + %r1 = tLEApcrelJT %jump-table.0, 14, _ + %r0 = tLDRr killed %r0, killed %r1, 14, _ :: (load 4 from jump-table) + tBR_JTr killed %r0, %jump-table.0 + + bb.3.d2: + + bb.9.d8: + + bb.4.d3: + + bb.5.d4: + + bb.6.d5: + + bb.7.d6: + + bb.8.d7: + + bb.10.d9: + + bb.11.d10: + + bb.2.d1: + + bb.12.d11: + + bb.13.d12: + + bb.14.d13: + + bb.15.d14: + + bb.16.d15: + + bb.17.d16: + + bb.18.d17: + +... + +--- +name: bar +alignment: 1 +exposesReturnsTwice: false +noVRegs: true +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%r0' } + - { reg: '%r1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +constants: + - id: 0 + value: i32 12345678 + alignment: 4 +jumpTable: + kind: inline + entries: + - id: 0 + blocks: [ '%bb.3.d2', '%bb.9.d8', '%bb.4.d3', '%bb.5.d4', + '%bb.6.d5', '%bb.7.d6', '%bb.8.d7', '%bb.10.d9', + '%bb.11.d10', '%bb.2.d1', '%bb.2.d1', '%bb.2.d1', + '%bb.2.d1', '%bb.2.d1', '%bb.2.d1', '%bb.2.d1', + '%bb.2.d1', '%bb.2.d1', '%bb.12.d11', '%bb.13.d12', + '%bb.14.d13', '%bb.15.d14', '%bb.2.d1', '%bb.16.d15', + '%bb.17.d16', '%bb.18.d17' ] +body: | + bb.0 (%ir-block.0): + successors: %bb.2.d1(0x03c3c3c4), %bb.1(0x7c3c3c3c) + liveins: %r0, %r1 + + %r2 = tLDRpci %const.0, 14, _ + tSTRi killed %r2, killed %r1, 0, 14, _ :: (store 4 into %ir.addr) + %r0 = tUXTB killed %r0, 14, _ + %r1, dead %cpsr = tSUBi3 killed %r0, 1, 14, _ + tCMPi8 %r1, 25, 14, _, implicit-def %cpsr + tBcc %bb.2.d1, 8, killed %cpsr + + bb.1 (%ir-block.0): + successors: %bb.3.d2(0x07c549d2), %bb.9.d8(0x07c549d2), %bb.4.d3(0x07c549d2), %bb.5.d4(0x07c549d2), %bb.6.d5(0x07c549d2), %bb.7.d6(0x07c549d2), %bb.8.d7(0x07c549d2), %bb.10.d9(0x07c549d2), %bb.11.d10(0x07c549d2), %bb.2.d1(0x03ab62db), %bb.12.d11(0x07c549d2), %bb.13.d12(0x07c549d2), %bb.14.d13(0x07c549d2), %bb.15.d14(0x07c549d2), %bb.16.d15(0x07c549d2), %bb.17.d16(0x07c549d2), %bb.18.d17(0x07c549d2) + liveins: %r1 + + %r0, dead %cpsr = tLSLri killed %r1, 2, 14, _ + %r1 = tLEApcrelJT %jump-table.0, 14, _ + %r0 = tLDRr killed %r0, killed %r1, 14, _ :: (load 4 from jump-table) + tBR_JTr killed %r0, %jump-table.0 + + bb.3.d2: + + bb.9.d8: + + bb.4.d3: + + bb.5.d4: + + bb.6.d5: + + bb.7.d6: + + bb.8.d7: + + bb.10.d9: + + bb.11.d10: + + bb.2.d1: + + bb.12.d11: + + bb.13.d12: + + bb.14.d13: + + bb.15.d14: + + bb.16.d15: + + bb.17.d16: + + bb.18.d17: + +... diff --git a/test/CodeGen/ARM/v8m-tail-call.ll b/test/CodeGen/ARM/v8m-tail-call.ll new file mode 100644 index 0000000000000..2c2c795838ff4 --- /dev/null +++ b/test/CodeGen/ARM/v8m-tail-call.ll @@ -0,0 +1,23 @@ +; RUN: llc %s -o - -mtriple=thumbv8m.base | FileCheck %s + +define void @test() { +; CHECK-LABEL: test: +entry: + %call = tail call i32 @foo() + %tail = tail call i32 @foo() + ret void +; CHECK: bl foo +; CHECK: bl foo +; CHECK-NOT: b foo +} + +define void @test2() { +; CHECK-LABEL: test2: +entry: + %tail = tail call i32 @foo() + ret void +; CHECK: b foo +; CHECK-NOT: bl foo +} + +declare i32 @foo() diff --git a/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll b/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll new file mode 100644 index 0000000000000..673e04687a10e --- /dev/null +++ b/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll @@ -0,0 +1,51 @@ +; RUN: llc -filetype=obj -o /dev/null < %s +; RUN: llc -filetype=asm < %s | FileCheck %s + +; ModuleID = 'bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-39ed676.bc" +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.base-arm-none-eabi" + +@crc32_tab = external unnamed_addr global [256 x i32], align 4 +@g_566 = external global i32**, align 4 + +define void @main() { +entry: + %0 = load volatile i32**, i32*** @g_566, align 4 + br label %func_16.exit.i.i.i + +lbl_1394.i.i.i.loopexit: ; preds = %for.cond14.preheader.us.i.i.i + unreachable + +func_16.exit.i.i.i: ; preds = %entry + br i1 undef, label %for.cond7.preheader.i.lr.ph.i.i, label %for.end476.i.i.i.loopexit + +for.cond7.preheader.i.lr.ph.i.i: ; preds = %func_16.exit.i.i.i + br i1 undef, label %for.end476.i.i.i.loopexit, label %for.cond7.preheader.i.i.preheader.i + +for.cond7.preheader.i.i.preheader.i: ; preds = %for.cond7.preheader.i.lr.ph.i.i + br label %for.cond14.preheader.us.i.i.i + +for.cond7.preheader.i.us.i.i: ; preds = %for.cond7.preheader.i.lr.ph.i.i + unreachable + +for.cond14.preheader.us.i.i.i: ; preds = %for.inc459.us.i.i.i, %for.cond7.preheader.i.i.preheader.i +; CHECK: @ BB#4 +; CHECK-NEXT: .p2align 2 + switch i4 undef, label %func_1.exit.loopexit [ + i4 0, label %for.inc459.us.i.i.i + i4 -5, label %for.inc459.us.i.i.i + i4 2, label %lbl_1394.i.i.i.loopexit + i4 3, label %for.end476.i.i.i.loopexit + ] + +for.inc459.us.i.i.i: ; preds = %for.cond14.preheader.us.i.i.i, %for.cond14.preheader.us.i.i.i + br label %for.cond14.preheader.us.i.i.i + +for.end476.i.i.i.loopexit: ; preds = %for.cond14.preheader.us.i.i.i + unreachable + +func_1.exit.loopexit: ; preds = %for.cond14.preheader.us.i.i.i + %arrayidx.i63.i.i5252 = getelementptr inbounds [256 x i32], [256 x i32]* @crc32_tab, i32 0, i32 undef + unreachable +} diff --git a/test/CodeGen/ARM/va_arg.ll b/test/CodeGen/ARM/va_arg.ll index d901a7461fc86..57470694b124b 100644 --- a/test/CodeGen/ARM/va_arg.ll +++ b/test/CodeGen/ARM/va_arg.ll @@ -4,8 +4,8 @@ ; CHECK-LABEL: test1: ; CHECK-NOT: bfc ; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7 -; CHECK: bfc [[REG]], #0, #3 -; CHECK-NOT: bfc +; CHECK: bic {{(r[0-9]+)|(lr)}}, [[REG]], #7 +; CHECK-NOT: bic define i64 @test1(i32 %i, ...) nounwind optsize { entry: @@ -20,8 +20,8 @@ entry: ; CHECK-LABEL: test2: ; CHECK-NOT: bfc ; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7 -; CHECK: bfc [[REG]], #0, #3 -; CHECK-NOT: bfc +; CHECK: bic {{(r[0-9]+)|(lr)}}, [[REG]], #7 +; CHECK-NOT: bic ; CHECK: bx lr define double @test2(i32 %a, i32* %b, ...) nounwind optsize { diff --git a/test/CodeGen/ARM/vcmp-crash.ll b/test/CodeGen/ARM/vcmp-crash.ll new file mode 100644 index 0000000000000..2d3262be5849b --- /dev/null +++ b/test/CodeGen/ARM/vcmp-crash.ll @@ -0,0 +1,11 @@ +; RUN: llc -mcpu=cortex-m4 < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7em-none--eabi" + +; CHECK: vcmp.f32 +define double @f(double %a, double %b, double %c, float %d) { + %1 = fcmp oeq float %d, 0.0 + %2 = select i1 %1, double %a, double %c + ret double %2 +} diff --git a/test/CodeGen/ARM/vldm-liveness.ll b/test/CodeGen/ARM/vldm-liveness.ll index e114e6970a324..63dc9d61ebcca 100644 --- a/test/CodeGen/ARM/vldm-liveness.ll +++ b/test/CodeGen/ARM/vldm-liveness.ll @@ -1,26 +1,13 @@ ; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s -; ARM load store optimizer was dealing with a sequence like: -; s1 = VLDRS [r0, 1], Q0<imp-def> -; s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def> -; s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def> -; s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def> +; Make sure we emit the loads in ascending order, and form a vldmia. ; -; It decided to combine the {s0, s1} loads into a single instruction in the -; third position. However, this leaves the instruction defining s3 with a stray -; imp-use of Q0, which is undefined. -; -; The verifier catches this, so this test just makes sure that appropriate -; liveness flags are added. -; -; I believe the change will be tested as long as the vldmia is not the first of -; the loads. Earlier optimisations may perturb the output over time, but -; fiddling the indices should be sufficient to restore the test. +; See vldm-liveness.mir for the bug this file originally testing. define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) { ; CHECK-LABEL: foo: -; CHECK: vldr s3, [r0, #8] ; CHECK: vldmia r0, {s0, s1} +; CHECK: vldr s3, [r0, #8] ; CHECK: vldr s2, [r0, #16] %off0 = getelementptr float, float* %ptr, i32 0 %val0 = load float, float* %off0 diff --git a/test/CodeGen/ARM/vldm-liveness.mir b/test/CodeGen/ARM/vldm-liveness.mir new file mode 100644 index 0000000000000..a85a018a8b1a5 --- /dev/null +++ b/test/CodeGen/ARM/vldm-liveness.mir @@ -0,0 +1,40 @@ +# RUN: llc -run-pass arm-ldst-opt -verify-machineinstrs %s -o - | FileCheck %s +# ARM load store optimizer was dealing with a sequence like: +# s1 = VLDRS [r0, 1], Q0<imp-def> +# s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def> +# s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def> +# s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def> +# +# It decided to combine the {s0, s1} loads into a single instruction in the +# third position. However, this leaves the instruction defining s3 with a stray +# imp-use of Q0, which is undefined. +# +# The verifier catches this, so this test just makes sure that appropriate +# liveness flags are added. +--- | + target triple = "thumbv7-apple-ios" + define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) { + ret <4 x float> undef + } +... +--- +name: foo +alignment: 1 +liveins: + - { reg: '%r0' } +body: | + bb.0 (%ir-block.0): + liveins: %r0 + + %s1 = VLDRS %r0, 1, 14, _, implicit-def %q0 :: (load 4) + %s3 = VLDRS %r0, 2, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4) + ; CHECK: %s3 = VLDRS %r0, 2, 14, _, implicit killed undef %q0, implicit-def %q0 :: (load 4) + + %s0 = VLDRS %r0, 0, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4) + ; CHECK: VLDMSIA %r0, 14, _, def %s0, def %s1, implicit-def _ + + %s2 = VLDRS killed %r0, 4, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4) + ; CHECK: %s2 = VLDRS killed %r0, 4, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4) + + tBX_RET 14, _, implicit %q0 +... diff --git a/test/CodeGen/ARM/vsel.ll b/test/CodeGen/ARM/vsel.ll index 746b1b000ef10..daea41399b47c 100644 --- a/test/CodeGen/ARM/vsel.ll +++ b/test/CodeGen/ARM/vsel.ll @@ -132,7 +132,7 @@ define void @test_vsel32oeq(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp oeq float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vseleq.f32 s0, s2, s3 ret void } @@ -141,7 +141,7 @@ define void @test_vsel64oeq(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp oeq float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vseleq.f64 d16, d1, d2 ret void } @@ -276,7 +276,7 @@ define void @test_vsel32une(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp une float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vseleq.f32 s0, s3, s2 ret void } @@ -285,7 +285,7 @@ define void @test_vsel64une(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp une float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vseleq.f64 d16, d2, d1 ret void } diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index a83a4df5490c3..0a5235df319fe 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -318,33 +318,29 @@ entry: ret void } -define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) { +define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) { ; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8. ; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to -; truncate from i32 to i16 and one vuzp to perform the final truncation for i8. -; CHECK-LABEL: vuzp_trunc: +; truncate from i32 to i16 and one vmovn.i16 to perform the final truncation for i8. +; CHECK-LABEL: cmpsel_trunc: ; CHECK: @ BB#0: ; CHECK-NEXT: .save {r4, r5, r11, lr} ; CHECK-NEXT: push {r4, r5, r11, lr} -; CHECK-NEXT: add r12, sp, #48 -; CHECK-NEXT: add lr, sp, #16 ; CHECK-NEXT: add r4, sp, #64 ; CHECK-NEXT: add r5, sp, #32 +; CHECK-NEXT: add r12, sp, #48 +; CHECK-NEXT: add lr, sp, #16 ; CHECK-NEXT: vld1.64 {d16, d17}, [r5] ; CHECK-NEXT: vld1.64 {d18, d19}, [r4] ; CHECK-NEXT: vld1.64 {d20, d21}, [lr] ; CHECK-NEXT: vld1.64 {d22, d23}, [r12] ; CHECK-NEXT: vcgt.u32 q8, q9, q8 ; CHECK-NEXT: vcgt.u32 q9, q11, q10 -; CHECK-NEXT: vmovn.i32 d16, q8 -; CHECK-NEXT: vmovn.i32 d17, q9 -; CHECK-NEXT: vmov.i8 d18, #0x7 -; CHECK-NEXT: vmov d19, r0, r1 -; CHECK-NEXT: vuzp.8 d17, d16 -; CHECK-NEXT: vneg.s8 d16, d18 -; CHECK-NEXT: vshl.i8 d17, d17, #7 +; CHECK-NEXT: vmovn.i32 d17, q8 +; CHECK-NEXT: vmovn.i32 d16, q9 ; CHECK-NEXT: vmov d18, r2, r3 -; CHECK-NEXT: vshl.s8 d16, d17, d16 +; CHECK-NEXT: vmov d19, r0, r1 +; CHECK-NEXT: vmovn.i16 d16, q8 ; CHECK-NEXT: vbsl d16, d19, d18 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: pop {r4, r5, r11, lr} |