diff options
Diffstat (limited to 'test/CodeGen/ARM')
-rw-r--r-- | test/CodeGen/ARM/2012-08-30-select.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir | 373 | ||||
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-isel.ll | 41 | ||||
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-legalizer.mir | 84 | ||||
-rw-r--r-- | test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir | 30 | ||||
-rw-r--r-- | test/CodeGen/ARM/alloca-align.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/constantfp.ll | 50 | ||||
-rw-r--r-- | test/CodeGen/ARM/execute-only-big-stack-frame.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/fpoffset_overflow.mir | 12 | ||||
-rw-r--r-- | test/CodeGen/ARM/misched-fusion-aes.ll | 203 | ||||
-rw-r--r-- | test/CodeGen/ARM/v6m-umul-with-overflow.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/ARM/vector-promotion.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll | 9 |
15 files changed, 825 insertions, 26 deletions
diff --git a/test/CodeGen/ARM/2012-08-30-select.ll b/test/CodeGen/ARM/2012-08-30-select.ll index dbedad2637b73..97b732beb4d79 100644 --- a/test/CodeGen/ARM/2012-08-30-select.ll +++ b/test/CodeGen/ARM/2012-08-30-select.ll @@ -2,10 +2,9 @@ ; rdar://12201387 ;CHECK-LABEL: select_s_v_v: -;CHECK: itee ne -;CHECK-NEXT: vmovne.i32 -;CHECK-NEXT: vmoveq -;CHECK-NEXT: vmoveq +;CHECK: vmov +;CHECK-NEXT: vmov +;CHECK: vmov.i32 ;CHECK: bx define <16 x i8> @select_s_v_v(<16 x i8> %vec, i32 %avail) { entry: diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir new file mode 100644 index 0000000000000..111375ece51ba --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir @@ -0,0 +1,373 @@ +# RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- | + define void @test_icmp_eq_s32() { ret void } + define void @test_icmp_ne_s32() { ret void } + define void @test_icmp_ugt_s32() { ret void } + define void @test_icmp_uge_s32() { ret void } + define void @test_icmp_ult_s32() { ret void } + define void @test_icmp_ule_s32() { ret void } + define void @test_icmp_sgt_s32() { ret void } + define void @test_icmp_sge_s32() { ret void } + define void @test_icmp_slt_s32() { ret void } + define void @test_icmp_sle_s32() { ret void } +... +--- +name: test_icmp_eq_s32 +# CHECK-LABEL: name: test_icmp_eq_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(eq), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 0, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_ne_s32 +# CHECK-LABEL: name: test_icmp_ne_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(ne), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 1, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_ugt_s32 +# CHECK-LABEL: name: test_icmp_ugt_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(ugt), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 8, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_uge_s32 +# CHECK-LABEL: name: test_icmp_uge_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(uge), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 2, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_ult_s32 +# CHECK-LABEL: name: test_icmp_ult_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(ult), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 3, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_ule_s32 +# CHECK-LABEL: name: test_icmp_ule_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(ule), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 9, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_sgt_s32 +# CHECK-LABEL: name: test_icmp_sgt_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(sgt), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 12, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_sge_s32 +# CHECK-LABEL: name: test_icmp_sge_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(sge), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 10, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_slt_s32 +# CHECK-LABEL: name: test_icmp_slt_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(slt), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 11, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_sle_s32 +# CHECK-LABEL: name: test_icmp_sle_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s1) = G_ICMP intpred(sle), %0(s32), %1 + ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr + ; CHECK: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 13, %cpsr + + %3(s32) = G_ZEXT %2(s1) + ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ + + %r0 = COPY %3(s32) + ; CHECK: %r0 = COPY [[RET]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index 6663a9210b870..0ff8d52e94c62 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -1009,8 +1009,11 @@ define i32 @test_constantstruct_v2s32_s32_s32() { ; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) ; CHECK: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 ; CHECK: [[C4:%[0-9]+]](s32) = G_CONSTANT i32 4 -; CHECK: [[CS:%[0-9]+]](s128) = G_SEQUENCE [[VEC]](<2 x s32>), 0, [[C3]](s32), 64, [[C4]](s32), 96 -; CHECK: [[EXT:%[0-9]+]](<2 x s32>) = G_EXTRACT [[CS]](s128), 0 +; CHECK: [[C5:%[0-9]+]](s128) = IMPLICIT_DEF +; CHECK: [[C6:%[0-9]+]](s128) = G_INSERT [[C5]], [[VEC]](<2 x s32>), 0 +; CHECK: [[C7:%[0-9]+]](s128) = G_INSERT [[C6]], [[C3]](s32), 64 +; CHECK: [[C8:%[0-9]+]](s128) = G_INSERT [[C7]], [[C4]](s32), 96 +; CHECK: [[EXT:%[0-9]+]](<2 x s32>) = G_EXTRACT [[C8]](s128), 0 ; CHECK: G_EXTRACT_VECTOR_ELT [[EXT]](<2 x s32>) %vec = extractvalue %struct.v2s32.s32.s32 {<2 x i32><i32 1, i32 2>, i32 3, i32 4}, 0 %elt = extractelement <2 x i32> %vec, i32 0 diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll index 6ddc29a3bbbae..76fb39ecea013 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -359,3 +359,44 @@ entry: %v = fadd double %f0, %f1 ret double %v } + +define arm_aapcscc i32 @test_cmp_i32_eq(i32 %a, i32 %b) { +; CHECK-LABEL: test_cmp_i32_eq: +; CHECK: mov [[V:r[0-9]+]], #0 +; CHECK: cmp r0, r1 +; CHECK: moveq [[V]], #1 +; CHECK: and r0, [[V]], #1 +; CHECK: bx lr +entry: + %v = icmp eq i32 %a, %b + %r = zext i1 %v to i32 + ret i32 %r +} + +define arm_aapcscc i32 @test_cmp_ptr_neq(double *%a, double *%b) { +; CHECK-LABEL: test_cmp_ptr_neq: +; CHECK: mov [[V:r[0-9]+]], #0 +; CHECK: cmp r0, r1 +; CHECK: movne [[V]], #1 +; CHECK: and r0, [[V]], #1 +; CHECK: bx lr +entry: + %v = icmp ne double * %a, %b + %r = zext i1 %v to i32 + ret i32 %r +} + +define arm_aapcscc i32 @test_cmp_i16_slt(i16 %a, i16 %b) { +; CHECK-LABEL: test_cmp_i16_slt: +; CHECK-DAG: sxth r0, r0 +; CHECK-DAG: sxth r1, r1 +; CHECK-DAG: mov [[V:r[0-9]+]], #0 +; CHECK: cmp r0, r1 +; CHECK: movlt [[V]], #1 +; CHECK: and r0, [[V]], #1 +; CHECK: bx lr +entry: + %v = icmp slt i16 %a, %b + %r = zext i1 %v to i32 + ret i32 %r +} diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index c6f6ca81c2795..2def31eb15929 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -35,6 +35,10 @@ define void @test_constants() { ret void } + define void @test_icmp_s8() { ret void } + define void @test_icmp_s16() { ret void } + define void @test_icmp_s32() { ret void } + define void @test_fadd_s32() #0 { ret void } define void @test_fadd_s64() #0 { ret void } @@ -691,6 +695,86 @@ body: | BX_RET 14, _, implicit %r0 ... --- +name: test_icmp_s8 +# CHECK-LABEL: name: test_icmp_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s1) = G_ICMP intpred(ne), %0(s8), %1 + ; G_ICMP with s8 should widen + ; CHECK: {{%[0-9]+}}(s1) = G_ICMP intpred(ne), {{%[0-9]+}}(s32), {{%[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s1) = G_ICMP intpred(ne), {{%[0-9]+}}(s8), {{%[0-9]+}} + %3(s32) = G_ZEXT %2(s1) + %r0 = COPY %3(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_s16 +# CHECK-LABEL: name: test_icmp_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s1) = G_ICMP intpred(slt), %0(s16), %1 + ; G_ICMP with s16 should widen + ; CHECK: {{%[0-9]+}}(s1) = G_ICMP intpred(slt), {{%[0-9]+}}(s32), {{%[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s1) = G_ICMP intpred(slt), {{%[0-9]+}}(s16), {{%[0-9]+}} + %3(s32) = G_ZEXT %2(s1) + %r0 = COPY %3(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_icmp_s32 +# CHECK-LABEL: name: test_icmp_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s1) = G_ICMP intpred(eq), %0(s32), %1 + ; G_ICMP with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s1) = G_ICMP intpred(eq), {{%[0-9]+}}(s32), {{%[0-9]+}} + %3(s32) = G_ZEXT %2(s1) + %r0 = COPY %3(s32) + BX_RET 14, _, implicit %r0 +... +--- name: test_fadd_s32 # CHECK-LABEL: name: test_fadd_s32 legalized: false diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index cc1df80c60191..d97dd60bac223 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -34,6 +34,8 @@ define void @test_trunc_s32_16() { ret void } + define void @test_icmp_eq_s32() { ret void } + define void @test_fadd_s32() #0 { ret void } define void @test_fadd_s64() #0 { ret void } @@ -711,6 +713,34 @@ body: | BX_RET 14, _, implicit %r0 ... --- +name: test_icmp_eq_s32 +# CHECK-LABEL: name: test_icmp_eq_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s1) = G_ICMP intpred(eq), %0(s32), %1 + %3(s32) = G_ZEXT %2(s1) + %r0 = COPY %3(s32) + BX_RET 14, _, implicit %r0 + +... +--- name: test_fadd_s32 # CHECK-LABEL: name: test_fadd_s32 legalized: true diff --git a/test/CodeGen/ARM/alloca-align.ll b/test/CodeGen/ARM/alloca-align.ll index 3bba156f0ee06..6186d137ef7fd 100644 --- a/test/CodeGen/ARM/alloca-align.ll +++ b/test/CodeGen/ARM/alloca-align.ll @@ -12,7 +12,7 @@ declare void @bar(i32*, [20000 x i8]* byval) ; And a base pointer getting used. ; CHECK: mov r6, sp ; Which is passed to the call -; CHECK: add [[REG:r[0-9]+]], r6, #19456 +; CHECK: add [[REG:r[0-9]+|lr]], r6, #19456 ; CHECK: add r0, [[REG]], #536 ; CHECK: bl bar define void @foo([20000 x i8]* %addr) { diff --git a/test/CodeGen/ARM/constantfp.ll b/test/CodeGen/ARM/constantfp.ll index b5aeadc05eba4..0b431f47f50bf 100644 --- a/test/CodeGen/ARM/constantfp.ll +++ b/test/CodeGen/ARM/constantfp.ll @@ -11,6 +11,9 @@ ; RUN: llc -mtriple=thumbv7meb -arm-execute-only -mcpu=cortex-m4 %s -o - \ ; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE-BE %s +; RUN: llc -mtriple=thumbv7m -arm-execute-only -mcpu=cortex-m4 -relocation-model=ropi %s -o - \ +; RUN: | FileCheck --check-prefix=CHECK-XO-ROPI %s + ; RUN: llc -mtriple=thumbv8m.main -mattr=fp-armv8 %s -o - \ ; RUN: | FileCheck --check-prefix=CHECK-NO-XO %s @@ -20,6 +23,8 @@ ; RUN: llc -mtriple=thumbv8m.maineb -arm-execute-only -mattr=fp-armv8 %s -o - \ ; RUN: | FileCheck --check-prefix=CHECK-XO-FLOAT --check-prefix=CHECK-XO-DOUBLE-BE %s +; RUN: llc -mtriple=thumbv8m.main -arm-execute-only -mattr=fp-armv8 -relocation-model=ropi %s -o - \ +; RUN: | FileCheck --check-prefix=CHECK-XO-ROPI %s define arm_aapcs_vfpcc float @test_vmov_f32() { ; CHECK-LABEL: test_vmov_f32: @@ -176,3 +181,48 @@ define arm_aapcs_vfpcc double @lower_const_f64_xo() { ; CHECK-XO-DOUBLE-BE-NOT: vldr ret double 3.140000e-01 } + +; This is a target independent optimization, performed by the +; DAG Combiner, which promotes floating point literals into +; constant pools: +; +; (a cond b) ? 1.0f : 2.0f -> load (ConstPoolAddr + ((a cond b) ? 0 : 4) +; +; We need to make sure that the constant pools are placed in +; the data section when generating execute-only code: + +define arm_aapcs_vfpcc float @lower_fpconst_select(float %f) { + +; CHECK-NO-XO-LABEL: lower_fpconst_select +; CHECK-NO-XO: adr [[REG:r[0-9]+]], [[LABEL:.?LCPI[0-9]+_[0-9]+]] +; CHECK-NO-XO: vldr {{s[0-9]+}}, {{[[]}}[[REG]]{{[]]}} +; CHECK-NO-XO-NOT: .rodata +; CHECK-NO-XO: [[LABEL]]: +; CHECK-NO-XO: .long 1335165689 +; CHECK-NO-XO: .long 1307470632 + +; CHECK-XO-FLOAT-LABEL: lower_fpconst_select +; CHECK-XO-FLOAT: movw [[REG:r[0-9]+]], :lower16:[[LABEL:.?LCP[0-9]+_[0-9]+]] +; CHECK-XO-FLOAT: movt [[REG]], :upper16:[[LABEL]] +; CHECK-XO-FLOAT: vldr {{s[0-9]+}}, {{[[]}}[[REG]]{{[]]}} +; CHECK-XO-FLOAT: .rodata +; CHECK-XO-FLOAT-NOT: .text +; CHECK-XO-FLOAT: [[LABEL]]: +; CHECK-XO-FLOAT: .long 1335165689 +; CHECK-XO-FLOAT: .long 1307470632 + +; CHECK-XO-ROPI-LABEL: lower_fpconst_select +; CHECK-XO-ROPI: movw [[REG:r[0-9]+]], :lower16:([[LABEL1:.?LCP[0-9]+_[0-9]+]]-([[LABEL2:.?LPC[0-9]+_[0-9]+]]+4)) +; CHECK-XO-ROPI: movt [[REG]], :upper16:([[LABEL1]]-([[LABEL2]]+4)) +; CHECK-XO-ROPI: [[LABEL2]]: +; CHECK-XO-ROPI: vldr {{s[0-9]+}}, {{[[]}}[[REG]]{{[]]}} +; CHECK-XO-ROPI: .rodata +; CHECK-XO-ROPI-NOT: .text +; CHECK-XO-ROPI: [[LABEL1]]: +; CHECK-XO-ROPI: .long 1335165689 +; CHECK-XO-ROPI: .long 1307470632 + + %cmp = fcmp nnan oeq float %f, 0.000000e+00 + %sel = select i1 %cmp, float 5.000000e+08, float 5.000000e+09 + ret float %sel +} diff --git a/test/CodeGen/ARM/execute-only-big-stack-frame.ll b/test/CodeGen/ARM/execute-only-big-stack-frame.ll index fb498a81e390a..0fe67f9863a58 100644 --- a/test/CodeGen/ARM/execute-only-big-stack-frame.ll +++ b/test/CodeGen/ARM/execute-only-big-stack-frame.ll @@ -10,10 +10,10 @@ define i8 @test_big_stack_frame() { ; CHECK-SUBW-ADDW-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-SUBW-ADDW: sub.w sp, sp, #65536 ; CHECK-SUBW-ADDW-NOT: ldr {{r[0-9]+}}, .{{.*}} -; CHECK-SUBW-ADDW: add.w [[REG1:r[0-9]+]], sp, #255 +; CHECK-SUBW-ADDW: add.w [[REG1:r[0-9]+|lr]], sp, #255 ; CHECK-SUBW-ADDW: add.w {{r[0-9]+}}, [[REG1]], #65280 ; CHECK-SUBW-ADDW-NOT: ldr {{r[0-9]+}}, .{{.*}} -; CHECK-SUBW-ADDW: add.w lr, sp, #61440 +; CHECK-SUBW-ADDW: add.w [[REGX:r[0-9]+|lr]], sp, #61440 ; CHECK-SUBW-ADDW-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-SUBW-ADDW: add.w sp, sp, #65536 diff --git a/test/CodeGen/ARM/fpoffset_overflow.mir b/test/CodeGen/ARM/fpoffset_overflow.mir index 9c6cd931b1532..4f3524bf7d117 100644 --- a/test/CodeGen/ARM/fpoffset_overflow.mir +++ b/test/CodeGen/ARM/fpoffset_overflow.mir @@ -3,10 +3,10 @@ # This should trigger an emergency spill in the register scavenger because the # frame offset into the large argument is too large. # CHECK-LABEL: name: func0 -# CHECK: t2STRi12 killed %r7, %sp, 0, 14, _ :: (store 4 into %stack.0) -# CHECK: %r7 = t2ADDri killed %sp, 4096, 14, _, _ -# CHECK: %r11 = t2LDRi12 killed %r7, 36, 14, _ :: (load 4) -# CHECK: %r7 = t2LDRi12 %sp, 0, 14, _ :: (load 4 from %stack.0) +# CHECK: t2STRi12 killed [[SPILLED:%r[0-9]+]], %sp, 0, 14, _ :: (store 4 into %stack.0) +# CHECK: [[SPILLED]] = t2ADDri killed %sp, 4096, 14, _, _ +# CHECK: %sp = t2LDRi12 killed [[SPILLED]], 40, 14, _ :: (load 4) +# CHECK: [[SPILLED]] = t2LDRi12 %sp, 0, 14, _ :: (load 4 from %stack.0) name: func0 tracksRegLiveness: true fixedStack: @@ -23,6 +23,7 @@ body: | %r4 = IMPLICIT_DEF %r5 = IMPLICIT_DEF %r6 = IMPLICIT_DEF + %r7 = IMPLICIT_DEF %r8 = IMPLICIT_DEF %r9 = IMPLICIT_DEF %r10 = IMPLICIT_DEF @@ -30,7 +31,7 @@ body: | %r12 = IMPLICIT_DEF %lr = IMPLICIT_DEF - %r11 = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4) + %sp = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4) KILL %r0 KILL %r1 @@ -39,6 +40,7 @@ body: | KILL %r4 KILL %r5 KILL %r6 + KILL %r7 KILL %r8 KILL %r9 KILL %r10 diff --git a/test/CodeGen/ARM/misched-fusion-aes.ll b/test/CodeGen/ARM/misched-fusion-aes.ll new file mode 100644 index 0000000000000..d3558ab4abb07 --- /dev/null +++ b/test/CodeGen/ARM/misched-fusion-aes.ll @@ -0,0 +1,203 @@ +; RUN: llc %s -o - -mtriple=armv8 -mattr=+crypto,+fuse-aes -enable-misched -disable-post-ra | FileCheck %s + +declare <16 x i8> @llvm.arm.neon.aese(<16 x i8> %d, <16 x i8> %k) +declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %d) +declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %d, <16 x i8> %k) +declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %d) + +define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) { + %d0 = load <16 x i8>, <16 x i8>* %a0 + %a1 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 1 + %d1 = load <16 x i8>, <16 x i8>* %a1 + %a2 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 2 + %d2 = load <16 x i8>, <16 x i8>* %a2 + %a3 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 3 + %d3 = load <16 x i8>, <16 x i8>* %a3 + %k0 = load <16 x i8>, <16 x i8>* %b0 + %e00 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %d0, <16 x i8> %k0) + %f00 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e00) + %e01 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %d1, <16 x i8> %k0) + %f01 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e01) + %e02 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %d2, <16 x i8> %k0) + %f02 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e02) + %e03 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %d3, <16 x i8> %k0) + %f03 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e03) + %b1 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 1 + %k1 = load <16 x i8>, <16 x i8>* %b1 + %e10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f00, <16 x i8> %k1) + %f10 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e00) + %e11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f01, <16 x i8> %k1) + %f11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e01) + %e12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f02, <16 x i8> %k1) + %f12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e02) + %e13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f03, <16 x i8> %k1) + %f13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e03) + %b2 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 2 + %k2 = load <16 x i8>, <16 x i8>* %b2 + %e20 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f10, <16 x i8> %k2) + %f20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e10) + %e21 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f11, <16 x i8> %k2) + %f21 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e11) + %e22 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f12, <16 x i8> %k2) + %f22 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e12) + %e23 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f13, <16 x i8> %k2) + %f23 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e13) + %b3 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 3 + %k3 = load <16 x i8>, <16 x i8>* %b3 + %e30 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f20, <16 x i8> %k3) + %f30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e20) + %e31 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f21, <16 x i8> %k3) + %f31 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e21) + %e32 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f22, <16 x i8> %k3) + %f32 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e22) + %e33 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f23, <16 x i8> %k3) + %f33 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %e23) + %g0 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f30, <16 x i8> %d) + %h0 = xor <16 x i8> %g0, %e + %g1 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f31, <16 x i8> %d) + %h1 = xor <16 x i8> %g1, %e + %g2 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f32, <16 x i8> %d) + %h2 = xor <16 x i8> %g2, %e + %g3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %f33, <16 x i8> %d) + %h3 = xor <16 x i8> %g3, %e + store <16 x i8> %h0, <16 x i8>* %c0 + %c1 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 1 + store <16 x i8> %h1, <16 x i8>* %c1 + %c2 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 2 + store <16 x i8> %h2, <16 x i8>* %c2 + %c3 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 3 + store <16 x i8> %h3, <16 x i8>* %c3 + ret void + +; CHECK-LABEL: aesea: +; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]] +; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]] +; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]] +; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]] +; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]] +; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]] +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} +; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]] +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} +; CHECK: aese.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QH]] +} + +define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) { + %d0 = load <16 x i8>, <16 x i8>* %a0 + %a1 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 1 + %d1 = load <16 x i8>, <16 x i8>* %a1 + %a2 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 2 + %d2 = load <16 x i8>, <16 x i8>* %a2 + %a3 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 3 + %d3 = load <16 x i8>, <16 x i8>* %a3 + %k0 = load <16 x i8>, <16 x i8>* %b0 + %e00 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %d0, <16 x i8> %k0) + %f00 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e00) + %e01 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %d1, <16 x i8> %k0) + %f01 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e01) + %e02 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %d2, <16 x i8> %k0) + %f02 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e02) + %e03 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %d3, <16 x i8> %k0) + %f03 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e03) + %b1 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 1 + %k1 = load <16 x i8>, <16 x i8>* %b1 + %e10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f00, <16 x i8> %k1) + %f10 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e00) + %e11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f01, <16 x i8> %k1) + %f11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e01) + %e12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f02, <16 x i8> %k1) + %f12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e02) + %e13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f03, <16 x i8> %k1) + %f13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e03) + %b2 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 2 + %k2 = load <16 x i8>, <16 x i8>* %b2 + %e20 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f10, <16 x i8> %k2) + %f20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e10) + %e21 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f11, <16 x i8> %k2) + %f21 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e11) + %e22 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f12, <16 x i8> %k2) + %f22 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e12) + %e23 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f13, <16 x i8> %k2) + %f23 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e13) + %b3 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 3 + %k3 = load <16 x i8>, <16 x i8>* %b3 + %e30 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f20, <16 x i8> %k3) + %f30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e20) + %e31 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f21, <16 x i8> %k3) + %f31 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e21) + %e32 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f22, <16 x i8> %k3) + %f32 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e22) + %e33 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f23, <16 x i8> %k3) + %f33 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %e23) + %g0 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f30, <16 x i8> %d) + %h0 = xor <16 x i8> %g0, %e + %g1 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f31, <16 x i8> %d) + %h1 = xor <16 x i8> %g1, %e + %g2 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f32, <16 x i8> %d) + %h2 = xor <16 x i8> %g2, %e + %g3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %f33, <16 x i8> %d) + %h3 = xor <16 x i8> %g3, %e + store <16 x i8> %h0, <16 x i8>* %c0 + %c1 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 1 + store <16 x i8> %h1, <16 x i8>* %c1 + %c2 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 2 + store <16 x i8> %h2, <16 x i8>* %c2 + %c3 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 3 + store <16 x i8> %h3, <16 x i8>* %c3 + ret void + +; CHECK-LABEL: aesda: +; CHECK: aesd.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]] +; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]] +; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]] +; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]] +; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]] +; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]] +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} +; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QG]] +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} +; CHECK: aesd.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QH]] +} + +define void @aes_load_store(<16 x i8> *%p1, <16 x i8> *%p2 , <16 x i8> *%p3) { +entry: + %x1 = alloca <16 x i8>, align 16 + %x2 = alloca <16 x i8>, align 16 + %x3 = alloca <16 x i8>, align 16 + %x4 = alloca <16 x i8>, align 16 + %x5 = alloca <16 x i8>, align 16 + %in1 = load <16 x i8>, <16 x i8>* %p1, align 16 + store <16 x i8> %in1, <16 x i8>* %x1, align 16 + %aese1 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %in1, <16 x i8> %in1) #2 + store <16 x i8> %aese1, <16 x i8>* %x2, align 16 + %in2 = load <16 x i8>, <16 x i8>* %p2, align 16 + %aesmc1= call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %aese1) #2 + store <16 x i8> %aesmc1, <16 x i8>* %x3, align 16 + %aese2 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %in1, <16 x i8> %in2) #2 + store <16 x i8> %aese2, <16 x i8>* %x4, align 16 + %aesmc2= call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %aese2) #2 + store <16 x i8> %aesmc2, <16 x i8>* %x5, align 16 + ret void + +; CHECK-LABEL: aes_load_store: +; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]] +; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} +; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]] +} diff --git a/test/CodeGen/ARM/v6m-umul-with-overflow.ll b/test/CodeGen/ARM/v6m-umul-with-overflow.ll new file mode 100644 index 0000000000000..4e3146d711024 --- /dev/null +++ b/test/CodeGen/ARM/v6m-umul-with-overflow.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s + +define i1 @unsigned_multiplication_did_overflow(i32, i32) { +; CHECK-LABEL: unsigned_multiplication_did_overflow: +entry-block: + %2 = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %0, i32 %1) + %3 = extractvalue { i32, i1 } %2, 1 + ret i1 %3 + +; CHECK: mov{{s?}} r2, r1 +; CHECK: mov{{s?}} r1, #0 +; CHECK: mov{{s?}} r3, {{#0|r1}} +; CHECK: bl __aeabi_lmul +} + +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) diff --git a/test/CodeGen/ARM/vector-promotion.ll b/test/CodeGen/ARM/vector-promotion.ll index 1dabee386089d..9e2b35fe82584 100644 --- a/test/CodeGen/ARM/vector-promotion.ll +++ b/test/CodeGen/ARM/vector-promotion.ll @@ -53,8 +53,8 @@ define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1* ; IR-BOTH: ret ; ; ASM-LABEL: unsupportedChainInDifferentBBs: -; ASM: vldrne [[LOAD:d[0-9]+]], [r0] -; ASM: vmovne.32 {{r[0-9]+}}, [[LOAD]] +; ASM: vldr [[LOAD:d[0-9]+]], [r0] +; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]] ; ASM: bx define void @unsupportedChainInDifferentBBs(<2 x i32>* %addr1, i32* %dest, i1 %bool) { bb1: diff --git a/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll b/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll index 5e3c45c3454d8..5017de835b5da 100644 --- a/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll +++ b/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll @@ -24,14 +24,13 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" ; CHECK-NEXT: bx lr } ; CHECK: .p2align 4 -; CHECK-NEXT: .long {{.*}}Lxray_synthetic_0 ; CHECK-NEXT: .long {{.*}}Lxray_fn_idx_synth_0 ; CHECK-NEXT: .section {{.*}}xray_instr_map{{.*}} -; CHECK-LABEL: Lxray_synthetic_0: +; CHECK-LABEL: Lxray_sleds_start0: ; CHECK: .long {{.*}}Lxray_sled_0 ; CHECK: .long {{.*}}Lxray_sled_1 -; CHECK-LABEL: Lxray_synthetic_end0: +; CHECK-LABEL: Lxray_sleds_end0: ; CHECK: .section {{.*}}xray_fn_idx{{.*}} ; CHECK-LABEL: Lxray_fn_idx_synth_0: -; CHECK: .long {{.*}}Lxray_synthetic_0 -; CHECK-NEXT: .long {{.*}}Lxray_synthetic_end0 +; CHECK: .long {{.*}}Lxray_sleds_start0 +; CHECK-NEXT: .long {{.*}}Lxray_sleds_end0 diff --git a/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll b/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll index 739151fbdd5e5..118c02adeb8aa 100644 --- a/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll +++ b/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll @@ -24,15 +24,14 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" ; CHECK-NEXT: bx lr } ; CHECK: .p2align 4 -; CHECK-NEXT: .long {{.*}}Lxray_synthetic_0 ; CHECK-NEXT: .long {{.*}}Lxray_fn_idx_synth_0 ; CHECK-NEXT: .section {{.*}}xray_instr_map{{.*}} -; CHECK-LABEL: Lxray_synthetic_0: +; CHECK-LABEL: Lxray_sleds_start0: ; CHECK: .long {{.*}}Lxray_sled_0 ; CHECK: .long {{.*}}Lxray_sled_1 -; CHECK-LABEL: Lxray_synthetic_end0: +; CHECK-LABEL: Lxray_sleds_end0: ; CHECK: .section {{.*}}xray_fn_idx{{.*}} ; CHECK-LABEL: Lxray_fn_idx_synth_0: -; CHECK: .long {{.*}}xray_synthetic_0 -; CHECK-NEXT: .long {{.*}}xray_synthetic_end0 +; CHECK: .long {{.*}}xray_sleds_start0 +; CHECK-NEXT: .long {{.*}}xray_sleds_end0 |