summaryrefslogtreecommitdiff
path: root/test/CodeGen/ARM
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
commit71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch)
tree5343938942df402b49ec7300a1c25a2d4ccd5821 /test/CodeGen/ARM
parent31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff)
Notes
Diffstat (limited to 'test/CodeGen/ARM')
-rw-r--r--test/CodeGen/ARM/2007-05-22-tailmerge-3.ll8
-rw-r--r--test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll1
-rw-r--r--test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll5
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir406
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll567
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll51
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-isel.ll144
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir282
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-legalizer.mir233
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir280
-rw-r--r--test/CodeGen/ARM/alloc-no-stack-realign.ll100
-rw-r--r--test/CodeGen/ARM/arg-copy-elide.ll61
-rw-r--r--test/CodeGen/ARM/arm-and-tst-peephole.ll58
-rw-r--r--test/CodeGen/ARM/arm-position-independence.ll144
-rw-r--r--test/CodeGen/ARM/atomic-cmpxchg.ll14
-rw-r--r--test/CodeGen/ARM/atomic-op.ll4
-rw-r--r--test/CodeGen/ARM/atomic-ops-v8.ll35
-rw-r--r--test/CodeGen/ARM/bfi.ll12
-rw-r--r--test/CodeGen/ARM/bic.ll13
-rw-r--r--test/CodeGen/ARM/bool-ext-inc.ll32
-rw-r--r--test/CodeGen/ARM/build-attributes.ll77
-rw-r--r--test/CodeGen/ARM/cmp1-peephole-thumb.mir78
-rw-r--r--test/CodeGen/ARM/cmp2-peephole-thumb.mir108
-rw-r--r--test/CodeGen/ARM/cmpxchg-weak.ll8
-rw-r--r--test/CodeGen/ARM/constantpool-promote.ll62
-rw-r--r--test/CodeGen/ARM/debug-info-s16-reg.ll2
-rw-r--r--test/CodeGen/ARM/debug-info-sreg2.ll2
-rw-r--r--test/CodeGen/ARM/div.ll9
-rw-r--r--test/CodeGen/ARM/fast-isel-align.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-cmp-imm.ll16
-rw-r--r--test/CodeGen/ARM/fold-stack-adjust.ll2
-rw-r--r--test/CodeGen/ARM/fp-only-sp.ll62
-rw-r--r--test/CodeGen/ARM/fp16-promote.ll32
-rw-r--r--test/CodeGen/ARM/fp16-v3.ll2
-rw-r--r--test/CodeGen/ARM/fpcmp-opt.ll8
-rw-r--r--test/CodeGen/ARM/fpcmp.ll4
-rw-r--r--test/CodeGen/ARM/fpcmp_ueq.ll2
-rw-r--r--test/CodeGen/ARM/fpscr-intrinsics.ll44
-rw-r--r--test/CodeGen/ARM/gpr-paired-spill.ll18
-rw-r--r--test/CodeGen/ARM/ifcvt10.ll2
-rw-r--r--test/CodeGen/ARM/illegal-bitfield-loadstore.ll184
-rw-r--r--test/CodeGen/ARM/indirectbr.ll1
-rw-r--r--test/CodeGen/ARM/interval-update-remat.ll4
-rw-r--r--test/CodeGen/ARM/intrinsics-coprocessor.ll1
-rw-r--r--test/CodeGen/ARM/ldm-stm-i256.ll38
-rw-r--r--test/CodeGen/ARM/ldrd.ll28
-rw-r--r--test/CodeGen/ARM/load-combine-big-endian.ll779
-rw-r--r--test/CodeGen/ARM/load-combine.ll692
-rw-r--r--test/CodeGen/ARM/longMAC.ll262
-rw-r--r--test/CodeGen/ARM/lowerMUL-newload.ll115
-rw-r--r--test/CodeGen/ARM/mature-mc-support.ll2
-rw-r--r--test/CodeGen/ARM/misched-fp-basic.ll69
-rw-r--r--test/CodeGen/ARM/misched-int-basic-thumb2.mir175
-rw-r--r--test/CodeGen/ARM/misched-int-basic.mir128
-rw-r--r--test/CodeGen/ARM/movt.ll8
-rw-r--r--test/CodeGen/ARM/msr-it-block.ll8
-rw-r--r--test/CodeGen/ARM/neon_vabs.ll95
-rw-r--r--test/CodeGen/ARM/no-cmov2bfi.ll19
-rw-r--r--test/CodeGen/ARM/phi.ll1
-rw-r--r--test/CodeGen/ARM/pr32545.ll22
-rw-r--r--test/CodeGen/ARM/prera-ldst-aliasing.mir40
-rw-r--r--test/CodeGen/ARM/prera-ldst-insertpt.mir105
-rw-r--r--test/CodeGen/ARM/rbit.ll3
-rw-r--r--test/CodeGen/ARM/rev.ll14
-rw-r--r--test/CodeGen/ARM/select_const.ll326
-rw-r--r--test/CodeGen/ARM/select_xform.ll12
-rw-r--r--test/CodeGen/ARM/setcc-logic.ll74
-rw-r--r--test/CodeGen/ARM/setcc-sentinals.ll14
-rw-r--r--test/CodeGen/ARM/single-issue-r52.mir86
-rw-r--r--test/CodeGen/ARM/sjljeh-swifterror.ll27
-rw-r--r--test/CodeGen/ARM/smml.ll43
-rw-r--r--test/CodeGen/ARM/smul.ll29
-rw-r--r--test/CodeGen/ARM/softfp-fabs-fneg.ll3
-rw-r--r--test/CodeGen/ARM/special-reg-mcore.ll82
-rw-r--r--test/CodeGen/ARM/special-reg-v8m-main.ll8
-rw-r--r--test/CodeGen/ARM/stack_guard_remat.ll8
-rw-r--r--test/CodeGen/ARM/static-addr-hoisting.ll6
-rw-r--r--test/CodeGen/ARM/tail-opts.ll52
-rw-r--r--test/CodeGen/ARM/thumb1-div.ll67
-rw-r--r--test/CodeGen/ARM/unschedule-first-call.ll136
-rw-r--r--test/CodeGen/ARM/v6-jumptable-clobber.mir384
-rw-r--r--test/CodeGen/ARM/v8m-tail-call.ll23
-rw-r--r--test/CodeGen/ARM/v8m.base-jumptable_alignment.ll51
-rw-r--r--test/CodeGen/ARM/va_arg.ll8
-rw-r--r--test/CodeGen/ARM/vcmp-crash.ll11
-rw-r--r--test/CodeGen/ARM/vldm-liveness.ll19
-rw-r--r--test/CodeGen/ARM/vldm-liveness.mir40
-rw-r--r--test/CodeGen/ARM/vsel.ll8
-rw-r--r--test/CodeGen/ARM/vuzp.ll22
89 files changed, 6908 insertions, 416 deletions
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 52cc37e240845..b8f2980be7502 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -12,11 +12,11 @@
; CHECK: bl _quux
; CHECK-NOT: bl _quux
-; NOMERGE: bl _baz
-; NOMERGE: bl _baz
+; NOMERGE-DAG: bl _baz
+; NOMERGE-DAG: bl _baz
-; NOMERGE: bl _quux
-; NOMERGE: bl _quux
+; NOMERGE-DAG: bl _quux
+; NOMERGE-DAG: bl _quux
; ModuleID = 'tail.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
index 5d59fc64d9222..e5c2fb4d67a1b 100644
--- a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
+++ b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
@@ -1,5 +1,4 @@
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
; PR4091
define void @foo(i32 %i, i32* %p) nounwind {
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index 4a1341c4d6e71..2a5af6199a345 100644
--- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -12,13 +12,14 @@ define void @test_byval_8_bytes_alignment(i32 %i, ...) {
entry:
; CHECK: sub sp, sp, #12
; CHECK: sub sp, sp, #4
-; CHECK: stmib sp, {r1, r2, r3}
+; CHECK: add r0, sp, #4
+; CHECK: stm sp, {r0, r1, r2, r3}
%g = alloca i8*
%g1 = bitcast i8** %g to i8*
call void @llvm.va_start(i8* %g1)
; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
-; CHECK: bfc [[REG]], #0, #3
+; CHECK: bic [[REG]], [[REG]], #7
%0 = va_arg i8** %g, double
call void @llvm.va_end(i8* %g1)
diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
index 5c0853cfaab45..66d9033a6d7cb 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
@@ -1,10 +1,135 @@
# RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
+ define void @test_zext_s1() { ret void }
+ define void @test_sext_s1() { ret void }
+ define void @test_sext_s8() { ret void }
+ define void @test_zext_s16() { ret void }
+
define void @test_add_s8() { ret void }
define void @test_add_s16() { ret void }
define void @test_add_s32() { ret void }
+ define void @test_fadd_s32() #0 { ret void }
+ define void @test_fadd_s64() #0 { ret void }
+
define void @test_load_from_stack() { ret void }
+ define void @test_load_f32() #0 { ret void }
+ define void @test_load_f64() #0 { ret void }
+
+ define void @test_stores() #0 { ret void }
+
+ define void @test_gep() { ret void }
+ define void @test_constants() { ret void }
+
+ define void @test_soft_fp_double() #0 { ret void }
+
+ attributes #0 = { "target-features"="+vfp2,-neonfp" }
+...
+---
+name: test_zext_s1
+# CHECK-LABEL: name: test_zext_s1
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0
+
+ %0(s1) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = G_ZEXT %0(s1)
+ ; CHECK: [[VREGEXT:%[0-9]+]] = ANDri [[VREGX]], 1, 14, _, _
+
+ %r0 = COPY %1(s32)
+ ; CHECK: %r0 = COPY [[VREGEXT]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_sext_s1
+# CHECK-LABEL: name: test_sext_s1
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0
+
+ %0(s1) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = G_SEXT %0(s1)
+ ; CHECK: [[VREGAND:%[0-9]+]] = ANDri [[VREGX]], 1, 14, _, _
+ ; CHECK: [[VREGEXT:%[0-9]+]] = RSBri [[VREGAND]], 0, 14, _, _
+
+ %r0 = COPY %1(s32)
+ ; CHECK: %r0 = COPY [[VREGEXT]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_sext_s8
+# CHECK-LABEL: name: test_sext_s8
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0
+
+ %0(s8) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = G_SEXT %0(s8)
+ ; CHECK: [[VREGEXT:%[0-9]+]] = SXTB [[VREGX]], 0, 14, _
+
+ %r0 = COPY %1(s32)
+ ; CHECK: %r0 = COPY [[VREGEXT]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_zext_s16
+# CHECK-LABEL: name: test_zext_s16
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0
+
+ %0(s16) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = G_ZEXT %0(s16)
+ ; CHECK: [[VREGEXT:%[0-9]+]] = UXTH [[VREGX]], 0, 14, _
+
+ %r0 = COPY %1(s32)
+ ; CHECK: %r0 = COPY [[VREGEXT]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
...
---
name: test_add_s8
@@ -106,6 +231,72 @@ body: |
; CHECK: BX_RET 14, _, implicit %r0
...
---
+name: test_fadd_s32
+# CHECK-LABEL: name: test_fadd_s32
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: fprb }
+ - { id: 1, class: fprb }
+ - { id: 2, class: fprb }
+# CHECK: id: 0, class: spr
+# CHECK: id: 1, class: spr
+# CHECK: id: 2, class: spr
+body: |
+ bb.0:
+ liveins: %s0, %s1
+
+ %0(s32) = COPY %s0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0
+
+ %1(s32) = COPY %s1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1
+
+ %2(s32) = G_FADD %0, %1
+ ; CHECK: [[VREGSUM:%[0-9]+]] = VADDS [[VREGX]], [[VREGY]], 14, _
+
+ %s0 = COPY %2(s32)
+ ; CHECK: %s0 = COPY [[VREGSUM]]
+
+ BX_RET 14, _, implicit %s0
+ ; CHECK: BX_RET 14, _, implicit %s0
+...
+---
+name: test_fadd_s64
+# CHECK-LABEL: name: test_fadd_s64
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: fprb }
+ - { id: 1, class: fprb }
+ - { id: 2, class: fprb }
+# CHECK: id: 0, class: dpr
+# CHECK: id: 1, class: dpr
+# CHECK: id: 2, class: dpr
+body: |
+ bb.0:
+ liveins: %d0, %d1
+
+ %0(s64) = COPY %d0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0
+
+ %1(s64) = COPY %d1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1
+
+ %2(s64) = G_FADD %0, %1
+ ; CHECK: [[VREGSUM:%[0-9]+]] = VADDD [[VREGX]], [[VREGY]], 14, _
+
+ %d0 = COPY %2(s64)
+ ; CHECK: %d0 = COPY [[VREGSUM]]
+
+ BX_RET 14, _, implicit %d0
+ ; CHECK: BX_RET 14, _, implicit %d0
+...
+---
name: test_load_from_stack
# CHECK-LABEL: name: test_load_from_stack
legalized: true
@@ -122,20 +313,225 @@ registers:
# CHECK-DAG: id: 2, class: gpr
# CHECK-DAG: id: 3, class: gpr
fixedStack:
- - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+ - { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false }
- { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
- { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-# CHECK: id: [[FRAME_INDEX:[0-9]+]], offset: 8
+# CHECK-DAG: id: [[FI1:[0-9]+]], offset: 0
+# CHECK-DAG: id: [[FI32:[0-9]+]], offset: 8
body: |
bb.0:
liveins: %r0, %r1, %r2, %r3
%0(p0) = G_FRAME_INDEX %fixed-stack.2
- ; CHECK: [[FIVREG:%[0-9]+]] = ADDri %fixed-stack.[[FRAME_INDEX]], 0, 14, _, _
+ ; CHECK: [[FI32VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI32]], 0, 14, _, _
+
+ %1(s32) = G_LOAD %0(p0) :: (load 4)
+ ; CHECK: [[LD32VREG:%[0-9]+]] = LDRi12 [[FI32VREG]], 0, 14, _
+
+ %r0 = COPY %1
+ ; CHECK: %r0 = COPY [[LD32VREG]]
+
+ %2(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[FI1VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI1]], 0, 14, _, _
- %1(s32) = G_LOAD %0(p0)
- ; CHECK: {{%[0-9]+}} = LDRi12 [[FIVREG]], 0, 14, _
+ %3(s1) = G_LOAD %2(p0) :: (load 1)
+ ; CHECK: [[LD1VREG:%[0-9]+]] = LDRBi12 [[FI1VREG]], 0, 14, _
+
+ %r0 = COPY %3
+ ; CHECK: %r0 = COPY [[LD1VREG]]
BX_RET 14, _
; CHECK: BX_RET 14, _
...
+---
+name: test_load_f32
+# CHECK-LABEL: name: test_load_f32
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: fprb }
+# CHECK-DAG: id: [[P:[0-9]+]], class: gpr
+# CHECK-DAG: id: [[V:[0-9]+]], class: spr
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3
+
+ %0(p0) = COPY %r0
+
+ %1(s32) = G_LOAD %0(p0) :: (load 4)
+ ; CHECK: %[[V]] = VLDRS %[[P]], 0, 14, _
+
+ %s0 = COPY %1
+ ; CHECK: %s0 = COPY %[[V]]
+
+ BX_RET 14, _, implicit %s0
+ ; CHECK: BX_RET 14, _, implicit %s0
+...
+---
+name: test_load_f64
+# CHECK-LABEL: name: test_load_f64
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: fprb }
+# CHECK-DAG: id: [[P:[0-9]+]], class: gpr
+# CHECK-DAG: id: [[V:[0-9]+]], class: dpr
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3
+
+ %0(p0) = COPY %r0
+
+ %1(s64) = G_LOAD %0(p0) :: (load 8)
+ ; CHECK: %[[V]] = VLDRD %[[P]], 0, 14, _
+
+ %d0 = COPY %1
+ ; CHECK: %d0 = COPY %[[V]]
+
+ BX_RET 14, _, implicit %d0
+ ; CHECK: BX_RET 14, _, implicit %d0
+...
+---
+name: test_stores
+# CHECK-LABEL: name: test_stores
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+ - { id: 3, class: gprb }
+ - { id: 4, class: fprb }
+ - { id: 5, class: fprb }
+# CHECK: id: [[P:[0-9]+]], class: gpr
+# CHECK: id: [[I8:[0-9]+]], class: gpr
+# CHECK: id: [[I16:[0-9]+]], class: gpr
+# CHECK: id: [[I32:[0-9]+]], class: gpr
+# CHECK: id: [[F32:[0-9]+]], class: spr
+# CHECK: id: [[F64:[0-9]+]], class: dpr
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3
+
+ %0(p0) = COPY %r0
+ %1(s8) = COPY %r3
+ %2(s16) = COPY %r2
+ %3(s32) = COPY %r1
+ %4(s32) = COPY %s0
+ %5(s64) = COPY %d2
+
+ G_STORE %1(s8), %0(p0) :: (store 1)
+ ; CHECK: STRBi12 %[[I8]], %[[P]], 0, 14, _
+
+ G_STORE %2(s16), %0(p0) :: (store 2)
+ ; CHECK: STRH %[[I16]], %[[P]], _, 0, 14, _
+
+ G_STORE %3(s32), %0(p0) :: (store 4)
+ ; CHECK: STRi12 %[[I32]], %[[P]], 0, 14, _
+
+ G_STORE %4(s32), %0(p0) :: (store 4)
+ ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14, _
+
+ G_STORE %5(s64), %0(p0) :: (store 8)
+ ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14, _
+
+ BX_RET 14, _
+...
+---
+name: test_gep
+# CHECK-LABEL: name: test_gep
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK: id: [[PTR:[0-9]+]], class: gpr
+# CHECK: id: [[OFF:[0-9]+]], class: gpr
+# CHECK: id: [[GEP:[0-9]+]], class: gpr
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(p0) = COPY %r0
+ %1(s32) = COPY %r1
+
+ %2(p0) = G_GEP %0, %1(s32)
+ ; CHECK: %[[GEP]] = ADDrr %[[PTR]], %[[OFF]], 14, _, _
+
+ %r0 = COPY %2(p0)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_constants
+# CHECK-LABEL: name: test_constants
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+# CHECK: id: [[C:[0-9]+]], class: gpr
+body: |
+ bb.0:
+ %0(s32) = G_CONSTANT 42
+ ; CHECK: %[[C]] = MOVi 42, 14, _, _
+
+ %r0 = COPY %0(s32)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_soft_fp_double
+# CHECK-LABEL: name: test_soft_fp_double
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: fprb }
+ - { id: 3, class: gprb }
+ - { id: 4, class: gprb }
+# CHECK-DAG: id: {{[0-9]+}}, class: gpr
+# CHECK-DAG: id: {{[0-9]+}}, class: gpr
+# CHECK-DAG: id: {{[0-9]+}}, class: gpr
+# CHECK-DAG: id: {{[0-9]+}}, class: gpr
+# CHECK-DAG: id: [[DREG:[0-9]+]], class: dpr
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3
+
+ %0(s32) = COPY %r2
+ ; CHECK: [[IN1:%[0-9]+]] = COPY %r2
+
+ %1(s32) = COPY %r3
+ ; CHECK: [[IN2:%[0-9]+]] = COPY %r3
+
+ %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 1
+ ; CHECK: %[[DREG]] = VMOVDRR [[IN1]], [[IN2]]
+
+ %3(s32) = G_EXTRACT %2(s64), 0
+ %4(s32) = G_EXTRACT %2(s64), 32
+ ; CHECK: [[OUT1:%[0-9]+]] = VGETLNi32 %[[DREG]], 0
+ ; CHECK: [[OUT2:%[0-9]+]] = VGETLNi32 %[[DREG]], 1
+
+ %r0 = COPY %3
+ ; CHECK: %r0 = COPY [[OUT1]]
+
+ %r1 = COPY %4
+ ; CHECK: %r1 = COPY [[OUT2]]
+
+ BX_RET 14, _, implicit %r0, implicit %r1
+ ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
+...
diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index f863ed5a6849b..a7f5ec33bee3c 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple arm-unknown -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
+; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE
+; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
define void @test_void_return() {
; CHECK-LABEL: name: test_void_return
@@ -7,6 +8,20 @@ entry:
ret void
}
+define signext i1 @test_add_i1(i1 %x, i1 %y) {
+; CHECK-LABEL: name: test_add_i1
+; CHECK: liveins: %r0, %r1
+; CHECK-DAG: [[VREGX:%[0-9]+]](s1) = COPY %r0
+; CHECK-DAG: [[VREGY:%[0-9]+]](s1) = COPY %r1
+; CHECK: [[SUM:%[0-9]+]](s1) = G_ADD [[VREGX]], [[VREGY]]
+; CHECK: [[EXT:%[0-9]+]](s32) = G_SEXT [[SUM]]
+; CHECK: %r0 = COPY [[EXT]](s32)
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %sum = add i1 %x, %y
+ ret i1 %sum
+}
+
define i8 @test_add_i8(i8 %x, i8 %y) {
; CHECK-LABEL: name: test_add_i8
; CHECK: liveins: %r0, %r1
@@ -20,6 +35,17 @@ entry:
ret i8 %sum
}
+define signext i8 @test_return_sext_i8(i8 %x) {
+; CHECK-LABEL: name: test_return_sext_i8
+; CHECK: liveins: %r0
+; CHECK: [[VREG:%[0-9]+]](s8) = COPY %r0
+; CHECK: [[VREGEXT:%[0-9]+]](s32) = G_SEXT [[VREG]]
+; CHECK: %r0 = COPY [[VREGEXT]](s32)
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ ret i8 %x
+}
+
define i16 @test_add_i16(i16 %x, i16 %y) {
; CHECK-LABEL: name: test_add_i16
; CHECK: liveins: %r0, %r1
@@ -33,6 +59,17 @@ entry:
ret i16 %sum
}
+define zeroext i16 @test_return_zext_i16(i16 %x) {
+; CHECK-LABEL: name: test_return_zext_i16
+; CHECK: liveins: %r0
+; CHECK: [[VREG:%[0-9]+]](s16) = COPY %r0
+; CHECK: [[VREGEXT:%[0-9]+]](s32) = G_ZEXT [[VREG]]
+; CHECK: %r0 = COPY [[VREGEXT]](s32)
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ ret i16 %x
+}
+
define i32 @test_add_i32(i32 %x, i32 %y) {
; CHECK-LABEL: name: test_add_i32
; CHECK: liveins: %r0, %r1
@@ -46,8 +83,8 @@ entry:
ret i32 %sum
}
-define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
-; CHECK-LABEL: name: test_many_args
+define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
+; CHECK-LABEL: name: test_stack_args
; CHECK: fixedStack:
; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 4
; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 4
@@ -62,3 +99,527 @@ entry:
%sum = add i32 %p2, %p5
ret i32 %sum
}
+
+define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3,
+ i8 signext %p4, i16 signext %p5) {
+; CHECK-LABEL: name: test_stack_args_signext
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1
+; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2
+; CHECK: liveins: %r0, %r1, %r2, %r3
+; CHECK: [[VREGP1:%[0-9]+]]{{.*}} = COPY %r1
+; CHECK: [[FIP5:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P5]]
+; CHECK: [[VREGP5:%[0-9]+]]{{.*}} = G_LOAD [[FIP5]](p0)
+; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP1]], [[VREGP5]]
+; CHECK: %r0 = COPY [[SUM]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %sum = add i16 %p1, %p5
+ ret i16 %sum
+}
+
+define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3,
+ i8 zeroext %p4, i16 zeroext %p5) {
+; CHECK-LABEL: name: test_stack_args_zeroext
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1
+; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2
+; CHECK: liveins: %r0, %r1, %r2, %r3
+; CHECK: [[VREGP2:%[0-9]+]]{{.*}} = COPY %r2
+; CHECK: [[FIP4:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P4]]
+; CHECK: [[VREGP4:%[0-9]+]]{{.*}} = G_LOAD [[FIP4]](p0)
+; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP2]], [[VREGP4]]
+; CHECK: %r0 = COPY [[SUM]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %sum = add i8 %p2, %p4
+ ret i8 %sum
+}
+
+define i16 @test_ptr_arg(i16* %p) {
+; CHECK-LABEL: name: test_ptr_arg
+; CHECK: liveins: %r0
+; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0
+; CHECK: [[VREGV:%[0-9]+]](s16) = G_LOAD [[VREGP]](p0)
+entry:
+ %v = load i16, i16* %p
+ ret i16 %v
+}
+
+define i32* @test_ptr_ret(i32** %p) {
+; Test pointer returns and pointer-to-pointer arguments
+; CHECK-LABEL: name: test_ptr_ret
+; CHECK: liveins: %r0
+; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0
+; CHECK: [[VREGV:%[0-9]+]](p0) = G_LOAD [[VREGP]](p0)
+; CHECK: %r0 = COPY [[VREGV]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %v = load i32*, i32** %p
+ ret i32* %v
+}
+
+define i32 @test_ptr_arg_on_stack(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32* %p) {
+; CHECK-LABEL: name: test_ptr_arg_on_stack
+; CHECK: fixedStack:
+; CHECK: id: [[P:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4
+; CHECK: liveins: %r0, %r1, %r2, %r3
+; CHECK: [[FIP:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P]]
+; CHECK: [[VREGP:%[0-9]+]](p0) = G_LOAD [[FIP]](p0)
+; CHECK: [[VREGV:%[0-9]+]](s32) = G_LOAD [[VREGP]](p0)
+; CHECK: %r0 = COPY [[VREGV]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %v = load i32, i32* %p
+ ret i32 %v
+}
+
+define arm_aapcscc float @test_float_aapcscc(float %p0, float %p1, float %p2,
+ float %p3, float %p4, float %p5) {
+; CHECK-LABEL: name: test_float_aapcscc
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[P4:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4
+; CHECK-DAG: id: [[P5:[0-9]+]]{{.*}}offset: 4{{.*}}size: 4
+; CHECK: liveins: %r0, %r1, %r2, %r3
+; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %r1
+; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]]
+; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0)
+; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGP5]]
+; CHECK: %r0 = COPY [[VREGV]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %v = fadd float %p1, %p5
+ ret float %v
+}
+
+define arm_aapcs_vfpcc float @test_float_vfpcc(float %p0, float %p1, float %p2,
+ float %p3, float %p4, float %p5,
+ float %ridiculous,
+ float %number,
+ float %of,
+ float %parameters,
+ float %that,
+ float %should,
+ float %never,
+ float %exist,
+ float %in,
+ float %practice,
+ float %q0, float %q1) {
+; CHECK-LABEL: name: test_float_vfpcc
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[Q0:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4
+; CHECK-DAG: id: [[Q1:[0-9]+]]{{.*}}offset: 4{{.*}}size: 4
+; CHECK: liveins: %s0, %s1, %s2, %s3, %s4, %s5, %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15
+; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %s1
+; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]]
+; CHECK: [[VREGQ1:%[0-9]+]](s32) = G_LOAD [[FIQ1]](p0)
+; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGQ1]]
+; CHECK: %s0 = COPY [[VREGV]]
+; CHECK: BX_RET 14, _, implicit %s0
+entry:
+ %v = fadd float %p1, %q1
+ ret float %v
+}
+
+define arm_aapcs_vfpcc double @test_double_vfpcc(double %p0, double %p1, double %p2,
+ double %p3, double %p4, double %p5,
+ double %reasonable,
+ double %parameters,
+ double %q0, double %q1) {
+; CHECK-LABEL: name: test_double_vfpcc
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[Q0:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8
+; CHECK-DAG: id: [[Q1:[0-9]+]]{{.*}}offset: 8{{.*}}size: 8
+; CHECK: liveins: %d0, %d1, %d2, %d3, %d4, %d5, %d6, %d7
+; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d1
+; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]]
+; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0)
+; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]]
+; CHECK: %d0 = COPY [[VREGV]]
+; CHECK: BX_RET 14, _, implicit %d0
+entry:
+ %v = fadd double %p1, %q1
+ ret double %v
+}
+
+define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p2,
+ double %p3, double %p4, double %p5) {
+; CHECK-LABEL: name: test_double_aapcscc
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[P2:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8
+; CHECK-DAG: id: [[P3:[0-9]+]]{{.*}}offset: 8{{.*}}size: 8
+; CHECK-DAG: id: [[P4:[0-9]+]]{{.*}}offset: 16{{.*}}size: 8
+; CHECK-DAG: id: [[P5:[0-9]+]]{{.*}}offset: 24{{.*}}size: 8
+; CHECK: liveins: %r0, %r1, %r2, %r3
+; CHECK-DAG: [[VREGP1LO:%[0-9]+]](s32) = COPY %r2
+; CHECK-DAG: [[VREGP1HI:%[0-9]+]](s32) = COPY %r3
+; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1LO]](s32), 0, [[VREGP1HI]](s32), 32
+; BIG: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1HI]](s32), 0, [[VREGP1LO]](s32), 32
+; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]]
+; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0)
+; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGP5]]
+; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
+; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
+; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; CHECK-DAG: %r0 = COPY [[VREGVLO]]
+; CHECK-DAG: %r1 = COPY [[VREGVHI]]
+; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
+entry:
+ %v = fadd double %p1, %p5
+ ret double %v
+}
+
+define arm_aapcs_vfpcc double @test_double_gap_vfpcc(double %p0, float %filler,
+ double %p1, double %p2,
+ double %p3, double %p4,
+ double %reasonable,
+ double %parameters,
+ double %q0, double %q1) {
+; CHECK-LABEL: name: test_double_gap_vfpcc
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[Q0:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8
+; CHECK-DAG: id: [[Q1:[0-9]+]]{{.*}}offset: 8{{.*}}size: 8
+; CHECK: liveins: %d0, %d2, %d3, %d4, %d5, %d6, %d7, %s2
+; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d2
+; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]]
+; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0)
+; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]]
+; CHECK: %d0 = COPY [[VREGV]]
+; CHECK: BX_RET 14, _, implicit %d0
+entry:
+ %v = fadd double %p1, %q1
+ ret double %v
+}
+
+define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0,
+ double %p1) {
+; CHECK-LABEL: name: test_double_gap_aapcscc
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[P1:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8
+; CHECK: liveins: %r0, %r2, %r3
+; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r2
+; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r3
+; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32
+; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32
+; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]]
+; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0)
+; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]]
+; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
+; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
+; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; CHECK-DAG: %r0 = COPY [[VREGVLO]]
+; CHECK-DAG: %r1 = COPY [[VREGVHI]]
+; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
+entry:
+ %v = fadd double %p0, %p1
+ ret double %v
+}
+
+define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler,
+ double %p1) {
+; CHECK-LABEL: name: test_double_gap2_aapcscc
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[P1:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8
+; CHECK: liveins: %r0, %r1, %r2
+; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r0
+; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r1
+; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32
+; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32
+; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]]
+; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0)
+; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]]
+; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
+; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
+; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; CHECK-DAG: %r0 = COPY [[VREGVLO]]
+; CHECK-DAG: %r1 = COPY [[VREGVHI]]
+; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
+entry:
+ %v = fadd double %p0, %p1
+ ret double %v
+}
+
+define arm_aapcscc void @test_indirect_call(void() *%fptr) {
+; CHECK-LABEL: name: test_indirect_call
+; CHECK: [[FPTR:%[0-9]+]](p0) = COPY %r0
+; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: BLX [[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp
+; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp
+entry:
+ notail call arm_aapcscc void %fptr()
+ ret void
+}
+
+declare arm_aapcscc void @call_target()
+
+define arm_aapcscc void @test_direct_call() {
+; CHECK-LABEL: name: test_direct_call
+; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: BLX @call_target, csr_aapcs, implicit-def %lr, implicit %sp
+; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp
+entry:
+ notail call arm_aapcscc void @call_target()
+ ret void
+}
+
+declare arm_aapcscc i32* @simple_reg_params_target(i32, i32*)
+
+define arm_aapcscc i32* @test_call_simple_reg_params(i32 *%a, i32 %b) {
+; CHECK-LABEL: name: test_call_simple_reg_params
+; CHECK-DAG: [[AVREG:%[0-9]+]](p0) = COPY %r0
+; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r1
+; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK-DAG: %r0 = COPY [[BVREG]]
+; CHECK-DAG: %r1 = COPY [[AVREG]]
+; CHECK: BLX @simple_reg_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0
+; CHECK: [[RVREG:%[0-9]+]](p0) = COPY %r0
+; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: %r0 = COPY [[RVREG]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %r = notail call arm_aapcscc i32 *@simple_reg_params_target(i32 %b, i32 *%a)
+ ret i32 *%r
+}
+
+declare arm_aapcscc i32* @simple_stack_params_target(i32, i32*, i32, i32*, i32, i32*)
+
+define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) {
+; CHECK-LABEL: name: test_call_simple_stack_params
+; CHECK-DAG: [[AVREG:%[0-9]+]](p0) = COPY %r0
+; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r1
+; CHECK: ADJCALLSTACKDOWN 8, 14, _, implicit-def %sp, implicit %sp
+; CHECK-DAG: %r0 = COPY [[BVREG]]
+; CHECK-DAG: %r1 = COPY [[AVREG]]
+; CHECK-DAG: %r2 = COPY [[BVREG]]
+; CHECK-DAG: %r3 = COPY [[AVREG]]
+; CHECK: [[SP1:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[OFF1:%[0-9]+]](s32) = G_CONSTANT i32 0
+; CHECK: [[FI1:%[0-9]+]](p0) = G_GEP [[SP1]], [[OFF1]](s32)
+; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4
+; CHECK: [[SP2:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[OFF2:%[0-9]+]](s32) = G_CONSTANT i32 4
+; CHECK: [[FI2:%[0-9]+]](p0) = G_GEP [[SP2]], [[OFF2]](s32)
+; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store 4
+; CHECK: BLX @simple_stack_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0
+; CHECK: [[RVREG:%[0-9]+]](p0) = COPY %r0
+; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: %r0 = COPY [[RVREG]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %r = notail call arm_aapcscc i32 *@simple_stack_params_target(i32 %b, i32 *%a, i32 %b, i32 *%a, i32 %b, i32 *%a)
+ ret i32 *%r
+}
+
+declare arm_aapcscc signext i16 @ext_target(i8 signext, i8 zeroext, i16 signext, i16 zeroext, i8 signext, i8 zeroext, i16 signext, i16 zeroext, i1 zeroext)
+
+define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) {
+; CHECK-LABEL: name: test_call_ext_params
+; CHECK-DAG: [[AVREG:%[0-9]+]](s8) = COPY %r0
+; CHECK-DAG: [[BVREG:%[0-9]+]](s16) = COPY %r1
+; CHECK-DAG: [[CVREG:%[0-9]+]](s1) = COPY %r2
+; CHECK: ADJCALLSTACKDOWN 20, 14, _, implicit-def %sp, implicit %sp
+; CHECK: [[SEXTA:%[0-9]+]](s32) = G_SEXT [[AVREG]](s8)
+; CHECK: %r0 = COPY [[SEXTA]]
+; CHECK: [[ZEXTA:%[0-9]+]](s32) = G_ZEXT [[AVREG]](s8)
+; CHECK: %r1 = COPY [[ZEXTA]]
+; CHECK: [[SEXTB:%[0-9]+]](s32) = G_SEXT [[BVREG]](s16)
+; CHECK: %r2 = COPY [[SEXTB]]
+; CHECK: [[ZEXTB:%[0-9]+]](s32) = G_ZEXT [[BVREG]](s16)
+; CHECK: %r3 = COPY [[ZEXTB]]
+; CHECK: [[SP1:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[OFF1:%[0-9]+]](s32) = G_CONSTANT i32 0
+; CHECK: [[FI1:%[0-9]+]](p0) = G_GEP [[SP1]], [[OFF1]](s32)
+; CHECK: [[SEXTA2:%[0-9]+]](s32) = G_SEXT [[AVREG]]
+; CHECK: G_STORE [[SEXTA2]](s32), [[FI1]](p0){{.*}}store 4
+; CHECK: [[SP2:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[OFF2:%[0-9]+]](s32) = G_CONSTANT i32 4
+; CHECK: [[FI2:%[0-9]+]](p0) = G_GEP [[SP2]], [[OFF2]](s32)
+; CHECK: [[ZEXTA2:%[0-9]+]](s32) = G_ZEXT [[AVREG]]
+; CHECK: G_STORE [[ZEXTA2]](s32), [[FI2]](p0){{.*}}store 4
+; CHECK: [[SP3:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[OFF3:%[0-9]+]](s32) = G_CONSTANT i32 8
+; CHECK: [[FI3:%[0-9]+]](p0) = G_GEP [[SP3]], [[OFF3]](s32)
+; CHECK: [[SEXTB2:%[0-9]+]](s32) = G_SEXT [[BVREG]]
+; CHECK: G_STORE [[SEXTB2]](s32), [[FI3]](p0){{.*}}store 4
+; CHECK: [[SP4:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[OFF4:%[0-9]+]](s32) = G_CONSTANT i32 12
+; CHECK: [[FI4:%[0-9]+]](p0) = G_GEP [[SP4]], [[OFF4]](s32)
+; CHECK: [[ZEXTB2:%[0-9]+]](s32) = G_ZEXT [[BVREG]]
+; CHECK: G_STORE [[ZEXTB2]](s32), [[FI4]](p0){{.*}}store 4
+; CHECK: [[SP5:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[OFF5:%[0-9]+]](s32) = G_CONSTANT i32 16
+; CHECK: [[FI5:%[0-9]+]](p0) = G_GEP [[SP5]], [[OFF5]](s32)
+; CHECK: [[ZEXTC:%[0-9]+]](s32) = G_ZEXT [[CVREG]]
+; CHECK: G_STORE [[ZEXTC]](s32), [[FI5]](p0){{.*}}store 4
+; CHECK: BLX @ext_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0
+; CHECK: [[RVREG:%[0-9]+]](s16) = COPY %r0
+; CHECK: ADJCALLSTACKUP 20, 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: [[RExtVREG:%[0-9]+]](s32) = G_SEXT [[RVREG]]
+; CHECK: %r0 = COPY [[RExtVREG]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+ %r = notail call arm_aapcscc signext i16 @ext_target(i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i1 zeroext %c)
+ ret i16 %r
+}
+
+declare arm_aapcs_vfpcc double @vfpcc_fp_target(float, double)
+
+define arm_aapcs_vfpcc double @test_call_vfpcc_fp_params(double %a, float %b) {
+; CHECK-LABEL: name: test_call_vfpcc_fp_params
+; CHECK-DAG: [[AVREG:%[0-9]+]](s64) = COPY %d0
+; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %s2
+; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK-DAG: %s0 = COPY [[BVREG]]
+; CHECK-DAG: %d1 = COPY [[AVREG]]
+; CHECK: BLX @vfpcc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %d1, implicit-def %d0
+; CHECK: [[RVREG:%[0-9]+]](s64) = COPY %d0
+; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: %d0 = COPY [[RVREG]]
+; CHECK: BX_RET 14, _, implicit %d0
+entry:
+ %r = notail call arm_aapcs_vfpcc double @vfpcc_fp_target(float %b, double %a)
+ ret double %r
+}
+
+declare arm_aapcscc double @aapcscc_fp_target(float, double, float, double)
+
+define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) {
+; CHECK-LABEL: name: test_call_aapcs_fp_params
+; CHECK-DAG: [[A1:%[0-9]+]](s32) = COPY %r0
+; CHECK-DAG: [[A2:%[0-9]+]](s32) = COPY %r1
+; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A1]](s32), 0, [[A2]](s32), 32
+; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A2]](s32), 0, [[A1]](s32), 32
+; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r2
+; CHECK: ADJCALLSTACKDOWN 16, 14, _, implicit-def %sp, implicit %sp
+; CHECK-DAG: %r0 = COPY [[BVREG]]
+; CHECK-DAG: [[A1:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 0
+; CHECK-DAG: [[A2:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 32
+; LITTLE-DAG: %r2 = COPY [[A1]]
+; LITTLE-DAG: %r3 = COPY [[A2]]
+; BIG-DAG: %r2 = COPY [[A2]]
+; BIG-DAG: %r3 = COPY [[A1]]
+; CHECK: [[SP1:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[OFF1:%[0-9]+]](s32) = G_CONSTANT i32 0
+; CHECK: [[FI1:%[0-9]+]](p0) = G_GEP [[SP1]], [[OFF1]](s32)
+; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4
+; CHECK: [[SP2:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[OFF2:%[0-9]+]](s32) = G_CONSTANT i32 8
+; CHECK: [[FI2:%[0-9]+]](p0) = G_GEP [[SP2]], [[OFF2]](s32)
+; CHECK: G_STORE [[AVREG]](s64), [[FI2]](p0){{.*}}store 8
+; CHECK: BLX @aapcscc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
+; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r0
+; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r1
+; LITTLE: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R1]](s32), 0, [[R2]](s32), 32
+; BIG: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R2]](s32), 0, [[R1]](s32), 32
+; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 0
+; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 32
+; LITTLE-DAG: %r0 = COPY [[R1]]
+; LITTLE-DAG: %r1 = COPY [[R2]]
+; BIG-DAG: %r0 = COPY [[R2]]
+; BIG-DAG: %r1 = COPY [[R1]]
+; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
+entry:
+ %r = notail call arm_aapcscc double @aapcscc_fp_target(float %b, double %a, float %b, double %a)
+ ret double %r
+}
+
+declare arm_aapcscc float @different_call_conv_target(float)
+
+define arm_aapcs_vfpcc float @test_call_different_call_conv(float %x) {
+; CHECK-LABEL: name: test_call_different_call_conv
+; CHECK: [[X:%[0-9]+]](s32) = COPY %s0
+; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: %r0 = COPY [[X]]
+; CHECK: BLX @different_call_conv_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit-def %r0
+; CHECK: [[R:%[0-9]+]](s32) = COPY %r0
+; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: %s0 = COPY [[R]]
+; CHECK: BX_RET 14, _, implicit %s0
+entry:
+ %r = notail call arm_aapcscc float @different_call_conv_target(float %x)
+ ret float %r
+}
+
+define i32 @test_shufflevector_s32_v2s32(i32 %arg) {
+; CHECK-LABEL: name: test_shufflevector_s32_v2s32
+; CHECK: [[ARG:%[0-9]+]](s32) = COPY %r0
+; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = IMPLICIT_DEF
+; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
+; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32)
+; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>)
+; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>)
+ %vec = insertelement <1 x i32> undef, i32 %arg, i32 0
+ %shuffle = shufflevector <1 x i32> %vec, <1 x i32> undef, <2 x i32> zeroinitializer
+ %res = extractelement <2 x i32> %shuffle, i32 0
+ ret i32 %res
+}
+
+define i32 @test_shufflevector_v2s32_v3s32(i32 %arg1, i32 %arg2) {
+; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0
+; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
+; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
+; CHECK-DAG: [[MASK:%[0-9]+]](<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32), [[C1]](s32)
+; CHECK-DAG: [[V1:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32)
+; CHECK-DAG: [[V2:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32)
+; CHECK: [[VEC:%[0-9]+]](<3 x s32>) = G_SHUFFLE_VECTOR [[V2]](<2 x s32>), [[UNDEF]], [[MASK]](<3 x s32>)
+; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<3 x s32>)
+ %v1 = insertelement <2 x i32> undef, i32 %arg1, i32 0
+ %v2 = insertelement <2 x i32> %v1, i32 %arg2, i32 1
+ %shuffle = shufflevector <2 x i32> %v2, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 1>
+ %res = extractelement <3 x i32> %shuffle, i32 0
+ ret i32 %res
+}
+
+
+define i32 @test_shufflevector_v2s32_v4s32(i32 %arg1, i32 %arg2) {
+; CHECK-LABEL: name: test_shufflevector_v2s32_v4s32
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0
+; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
+; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
+; CHECK-DAG: [[MASK:%[0-9]+]](<4 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32), [[C0]](s32), [[C0]](s32)
+; CHECK-DAG: [[V1:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32)
+; CHECK-DAG: [[V2:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32)
+; CHECK: [[VEC:%[0-9]+]](<4 x s32>) = G_SHUFFLE_VECTOR [[V2]](<2 x s32>), [[UNDEF]], [[MASK]](<4 x s32>)
+; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<4 x s32>)
+ %v1 = insertelement <2 x i32> undef, i32 %arg1, i32 0
+ %v2 = insertelement <2 x i32> %v1, i32 %arg2, i32 1
+ %shuffle = shufflevector <2 x i32> %v2, <2 x i32> undef, <4 x i32> zeroinitializer
+ %res = extractelement <4 x i32> %shuffle, i32 0
+ ret i32 %res
+}
+
+define i32 @test_shufflevector_v4s32_v2s32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) {
+; CHECK-LABEL: name: test_shufflevector_v4s32_v2s32
+; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0
+; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1
+; CHECK: [[ARG3:%[0-9]+]](s32) = COPY %r2
+; CHECK: [[ARG4:%[0-9]+]](s32) = COPY %r3
+; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = IMPLICIT_DEF
+; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0
+; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1
+; CHECK-DAG: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2
+; CHECK-DAG: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3
+; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C3]](s32)
+; CHECK-DAG: [[V1:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32)
+; CHECK-DAG: [[V2:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32)
+; CHECK-DAG: [[V3:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[V2]], [[ARG3]](s32), [[C2]](s32)
+; CHECK-DAG: [[V4:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[V3]], [[ARG4]](s32), [[C3]](s32)
+; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[V4]](<4 x s32>), [[UNDEF]], [[MASK]](<2 x s32>)
+; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>)
+ %v1 = insertelement <4 x i32> undef, i32 %arg1, i32 0
+ %v2 = insertelement <4 x i32> %v1, i32 %arg2, i32 1
+ %v3 = insertelement <4 x i32> %v2, i32 %arg3, i32 2
+ %v4 = insertelement <4 x i32> %v3, i32 %arg4, i32 3
+ %shuffle = shufflevector <4 x i32> %v4, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
+ %res = extractelement <2 x i32> %shuffle, i32 0
+ ret i32 %res
+}
diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll b/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll
new file mode 100644
index 0000000000000..7d021fdb43dd9
--- /dev/null
+++ b/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple arm-linux-gnueabihf -mattr=+vfp2 -float-abi=hard -global-isel %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD
+; RUN: llc -mtriple arm-linux-gnueabi -mattr=+vfp2,+soft-float -float-abi=soft -global-isel %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT-AEABI
+; RUN: llc -mtriple arm-linux-gnu- -mattr=+vfp2,+soft-float -float-abi=soft -global-isel %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT-DEFAULT
+
+define arm_aapcscc float @test_frem_float(float %x, float %y) {
+; CHECK-LABEL: test_frem_float:
+; CHECK: blx fmodf
+ %r = frem float %x, %y
+ ret float %r
+}
+
+define arm_aapcscc double @test_frem_double(double %x, double %y) {
+; CHECK-LABEL: test_frem_double:
+; CHECK: blx fmod
+ %r = frem double %x, %y
+ ret double %r
+}
+
+declare float @llvm.pow.f32(float %x, float %y)
+define arm_aapcscc float @test_fpow_float(float %x, float %y) {
+; CHECK-LABEL: test_fpow_float:
+; CHECK: blx powf
+ %r = call float @llvm.pow.f32(float %x, float %y)
+ ret float %r
+}
+
+declare double @llvm.pow.f64(double %x, double %y)
+define arm_aapcscc double @test_fpow_double(double %x, double %y) {
+; CHECK-LABEL: test_fpow_double:
+; CHECK: blx pow
+ %r = call double @llvm.pow.f64(double %x, double %y)
+ ret double %r
+}
+
+define arm_aapcscc float @test_add_float(float %x, float %y) {
+; CHECK-LABEL: test_add_float:
+; HARD: vadd.f32
+; SOFT-AEABI: blx __aeabi_fadd
+; SOFT-DEFAULT: blx __addsf3
+ %r = fadd float %x, %y
+ ret float %r
+}
+
+define arm_aapcscc double @test_add_double(double %x, double %y) {
+; CHECK-LABEL: test_add_double:
+; HARD: vadd.f64
+; SOFT-AEABI: blx __aeabi_dadd
+; SOFT-DEFAULT: blx __adddf3
+ %r = fadd double %x, %y
+ ret double %r
+}
diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
index 3f01b6dd3a830..236dcbeb84c52 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple arm-unknown -global-isel %s -o - | FileCheck %s
+; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel %s -o - | FileCheck %s
define void @test_void_return() {
; CHECK-LABEL: test_void_return:
@@ -7,6 +7,39 @@ entry:
ret void
}
+define zeroext i1 @test_zext_i1(i1 %x) {
+; CHECK-LABEL: test_zext_i1
+; CHECK: and r0, r0, #1
+; CHECK: bx lr
+entry:
+ ret i1 %x
+}
+
+define signext i1 @test_sext_i1(i1 %x) {
+; CHECK-LABEL: test_sext_i1
+; CHECK: and r0, r0, #1
+; CHECK: rsb r0, r0, #0
+; CHECK: bx lr
+entry:
+ ret i1 %x
+}
+
+define zeroext i8 @test_ext_i8(i8 %x) {
+; CHECK-LABEL: test_ext_i8:
+; CHECK: uxtb r0, r0
+; CHECK: bx lr
+entry:
+ ret i8 %x
+}
+
+define signext i16 @test_ext_i16(i16 %x) {
+; CHECK-LABEL: test_ext_i16:
+; CHECK: sxth r0, r0
+; CHECK: bx lr
+entry:
+ ret i16 %x
+}
+
define i8 @test_add_i8(i8 %x, i8 %y) {
; CHECK-LABEL: test_add_i8:
; CHECK: add r0, r0, r1
@@ -34,8 +67,8 @@ entry:
ret i32 %sum
}
-define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
-; CHECK-LABEL: test_many_args:
+define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
+; CHECK-LABEL: test_stack_args_i32:
; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4
; CHECK: ldr [[P5:r[0-9]+]], {{.*}}[[P5ADDR]]
; CHECK: add r0, r2, [[P5]]
@@ -44,3 +77,108 @@ entry:
%sum = add i32 %p2, %p5
ret i32 %sum
}
+
+define i16 @test_stack_args_mixed(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 %p4, i16 %p5) {
+; CHECK-LABEL: test_stack_args_mixed:
+; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4
+; CHECK: ldrh [[P5:r[0-9]+]], {{.*}}[[P5ADDR]]
+; CHECK: add r0, r1, [[P5]]
+; CHECK: bx lr
+entry:
+ %sum = add i16 %p1, %p5
+ ret i16 %sum
+}
+
+define i16 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i16 zeroext %p4) {
+; CHECK-LABEL: test_stack_args_zeroext:
+; CHECK: mov [[P4ADDR:r[0-9]+]], sp
+; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]]
+; CHECK: add r0, r1, [[P4]]
+; CHECK: bx lr
+entry:
+ %sum = add i16 %p1, %p4
+ ret i16 %sum
+}
+
+define i8 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 signext %p4) {
+; CHECK-LABEL: test_stack_args_signext:
+; CHECK: mov [[P4ADDR:r[0-9]+]], sp
+; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]]
+; CHECK: add r0, r2, [[P4]]
+; CHECK: bx lr
+entry:
+ %sum = add i8 %p2, %p4
+ ret i8 %sum
+}
+
+define i32 @test_ptr_arg_in_reg(i32* %p) {
+; CHECK-LABEL: test_ptr_arg_in_reg:
+; CHECK: ldr r0, [r0]
+; CHECK: bx lr
+entry:
+ %v = load i32, i32* %p
+ ret i32 %v
+}
+
+define i32 @test_ptr_arg_on_stack(i32 %f0, i32 %f1, i32 %f2, i32 %f3, i32* %p) {
+; CHECK-LABEL: test_ptr_arg_on_stack:
+; CHECK: mov r0, sp
+; CHECK: ldr r0, [r0]
+; CHECK: ldr r0, [r0]
+; CHECK: bx lr
+entry:
+ %v = load i32, i32* %p
+ ret i32 %v
+}
+
+define i8* @test_ptr_ret(i8** %p) {
+; CHECK-LABEL: test_ptr_ret:
+; CHECK: ldr r0, [r0]
+; CHECK: bx lr
+entry:
+ %v = load i8*, i8** %p
+ ret i8* %v
+}
+
+define arm_aapcs_vfpcc float @test_float_hard(float %f0, float %f1) {
+; CHECK-LABEL: test_float_hard:
+; CHECK: vadd.f32 s0, s0, s1
+; CHECK: bx lr
+entry:
+ %v = fadd float %f0, %f1
+ ret float %v
+}
+
+define arm_aapcscc float @test_float_softfp(float %f0, float %f1) {
+; CHECK-LABEL: test_float_softfp:
+; CHECK-DAG: vmov [[F0:s[0-9]+]], r0
+; CHECK-DAG: vmov [[F1:s[0-9]+]], r1
+; CHECK: vadd.f32 [[FV:s[0-9]+]], [[F0]], [[F1]]
+; CHECK: vmov r0, [[FV]]
+; CHECK: bx lr
+entry:
+ %v = fadd float %f0, %f1
+ ret float %v
+}
+
+define arm_aapcs_vfpcc double @test_double_hard(double %f0, double %f1) {
+; CHECK-LABEL: test_double_hard:
+; CHECK: vadd.f64 d0, d0, d1
+; CHECK: bx lr
+entry:
+ %v = fadd double %f0, %f1
+ ret double %v
+}
+
+define arm_aapcscc double @test_double_softfp(double %f0, double %f1) {
+; CHECK-LABEL: test_double_softfp:
+; CHECK-DAG: vmov [[F0:d[0-9]+]], r0, r1
+; CHECK-DAG: vmov [[F1:d[0-9]+]], r2, r3
+; CHECK: vadd.f64 [[FV:d[0-9]+]], [[F0]], [[F1]]
+; CHECK: vmov.32 r0, [[FV]][0]
+; CHECK: vmov.32 r1, [[FV]][1]
+; CHECK: bx lr
+entry:
+ %v = fadd double %f0, %f1
+ ret double %v
+}
diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
new file mode 100644
index 0000000000000..d154b4887c195
--- /dev/null
+++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
@@ -0,0 +1,282 @@
+# RUN: llc -mtriple arm-linux-gnueabihf -mattr=+vfp2 -float-abi=hard -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD
+# RUN: llc -mtriple arm-linux-gnueabi -mattr=+vfp2,+soft-float -float-abi=soft -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT -check-prefix SOFT-AEABI
+# RUN: llc -mtriple arm-linux-gnu -mattr=+soft-float -float-abi=soft -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT -check-prefix SOFT-DEFAULT
+--- |
+ define void @test_frem_float() { ret void }
+ define void @test_frem_double() { ret void }
+
+ define void @test_fpow_float() { ret void }
+ define void @test_fpow_double() { ret void }
+
+ define void @test_fadd_float() { ret void }
+ define void @test_fadd_double() { ret void }
+...
+---
+name: test_frem_float
+# CHECK-LABEL: name: test_frem_float
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0
+ ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ ; CHECK: ADJCALLSTACKDOWN
+ ; SOFT-DAG: %r0 = COPY [[X]]
+ ; SOFT-DAG: %r1 = COPY [[Y]]
+ ; HARD-DAG: %s0 = COPY [[X]]
+ ; HARD-DAG: %s1 = COPY [[Y]]
+ ; SOFT: BLX $fmodf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0
+ ; HARD: BLX $fmodf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0
+ ; SOFT: [[R:%[0-9]+]](s32) = COPY %r0
+ ; HARD: [[R:%[0-9]+]](s32) = COPY %s0
+ ; CHECK: ADJCALLSTACKUP
+ %2(s32) = G_FREM %0, %1
+ ; CHECK: %r0 = COPY [[R]]
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_frem_double
+# CHECK-LABEL: name: test_frem_double
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+ - { id: 5, class: _ }
+ - { id: 6, class: _ }
+ - { id: 7, class: _ }
+ - { id: 8, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3
+
+ ; The inputs may be in the wrong order (depending on the target's
+ ; endianness), but that's orthogonal to what we're trying to test here.
+ ; For soft float, we only need to check that the first value, received
+ ; through R0-R1, ends up in R0-R1 or R1-R0, and the second value, received
+ ; through R2-R3, ends up in R2-R3 or R3-R2, when passed to fmod.
+ ; For hard float, the values need to end up in D0 and D1.
+ ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0
+ ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1
+ ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2
+ ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = COPY %r2
+ %3(s32) = COPY %r3
+ ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]]
+ ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]]
+ %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
+ %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32
+ ; CHECK: ADJCALLSTACKDOWN
+ ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]]
+ ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]]
+ ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]]
+ ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]]
+ ; HARD-DAG: %d0 = COPY [[X]]
+ ; HARD-DAG: %d1 = COPY [[Y]]
+ ; SOFT: BLX $fmod, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
+ ; HARD: BLX $fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0
+ ; CHECK: ADJCALLSTACKUP
+ %6(s64) = G_FREM %4, %5
+ %7(s32) = G_EXTRACT %6(s64), 0
+ %8(s32) = G_EXTRACT %6(s64), 32
+ %r0 = COPY %7(s32)
+ %r1 = COPY %8(s32)
+ BX_RET 14, _, implicit %r0, implicit %r1
+...
+---
+name: test_fpow_float
+# CHECK-LABEL: name: test_fpow_float
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0
+ ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ ; CHECK: ADJCALLSTACKDOWN
+ ; SOFT-DAG: %r0 = COPY [[X]]
+ ; SOFT-DAG: %r1 = COPY [[Y]]
+ ; HARD-DAG: %s0 = COPY [[X]]
+ ; HARD-DAG: %s1 = COPY [[Y]]
+ ; SOFT: BLX $powf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0
+ ; HARD: BLX $powf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0
+ ; SOFT: [[R:%[0-9]+]](s32) = COPY %r0
+ ; HARD: [[R:%[0-9]+]](s32) = COPY %s0
+ ; CHECK: ADJCALLSTACKUP
+ %2(s32) = G_FPOW %0, %1
+ ; CHECK: %r0 = COPY [[R]]
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_fpow_double
+# CHECK-LABEL: name: test_fpow_double
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+ - { id: 5, class: _ }
+ - { id: 6, class: _ }
+ - { id: 7, class: _ }
+ - { id: 8, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3
+
+ ; The inputs may be in the wrong order (depending on the target's
+ ; endianness), but that's orthogonal to what we're trying to test here.
+ ; For soft float, we only need to check that the first value, received
+ ; through R0-R1, ends up in R0-R1 or R1-R0, and the second value, received
+ ; through R2-R3, ends up in R2-R3 or R3-R2, when passed to pow.
+ ; For hard float, the values need to end up in D0 and D1.
+ ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0
+ ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1
+ ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2
+ ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = COPY %r2
+ %3(s32) = COPY %r3
+ ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]]
+ ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]]
+ %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
+ %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32
+ ; CHECK: ADJCALLSTACKDOWN
+ ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]]
+ ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]]
+ ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]]
+ ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]]
+ ; HARD-DAG: %d0 = COPY [[X]]
+ ; HARD-DAG: %d1 = COPY [[Y]]
+ ; SOFT: BLX $pow, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
+ ; HARD: BLX $pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0
+ ; CHECK: ADJCALLSTACKUP
+ %6(s64) = G_FPOW %4, %5
+ %7(s32) = G_EXTRACT %6(s64), 0
+ %8(s32) = G_EXTRACT %6(s64), 32
+ %r0 = COPY %7(s32)
+ %r1 = COPY %8(s32)
+ BX_RET 14, _, implicit %r0, implicit %r1
+...
+---
+name: test_fadd_float
+# CHECK-LABEL: name: test_fadd_float
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0
+ ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ ; HARD: [[R:%[0-9]+]](s32) = G_FADD [[X]], [[Y]]
+ ; SOFT: ADJCALLSTACKDOWN
+ ; SOFT-DAG: %r0 = COPY [[X]]
+ ; SOFT-DAG: %r1 = COPY [[Y]]
+ ; SOFT-AEABI: BLX $__aeabi_fadd, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0
+ ; SOFT-DEFAULT: BLX $__addsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0
+ ; SOFT: [[R:%[0-9]+]](s32) = COPY %r0
+ ; SOFT: ADJCALLSTACKUP
+ %2(s32) = G_FADD %0, %1
+ ; CHECK: %r0 = COPY [[R]]
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_fadd_double
+# CHECK-LABEL: name: test_fadd_double
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+ - { id: 5, class: _ }
+ - { id: 6, class: _ }
+ - { id: 7, class: _ }
+ - { id: 8, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3
+
+ ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0
+ ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1
+ ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2
+ ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = COPY %r2
+ %3(s32) = COPY %r3
+ ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]]
+ ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]]
+ %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
+ %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32
+ ; HARD: [[R:%[0-9]+]](s64) = G_FADD [[X]], [[Y]]
+ ; SOFT: ADJCALLSTACKDOWN
+ ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]]
+ ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]]
+ ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]]
+ ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]]
+ ; SOFT-AEABI: BLX $__aeabi_dadd, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
+ ; SOFT-DEFAULT: BLX $__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
+ ; SOFT: ADJCALLSTACKUP
+ %6(s64) = G_FADD %4, %5
+ ; HARD-DAG: G_EXTRACT [[R]](s64), 0
+ ; HARD-DAG: G_EXTRACT [[R]](s64), 32
+ %7(s32) = G_EXTRACT %6(s64), 0
+ %8(s32) = G_EXTRACT %6(s64), 32
+ %r0 = COPY %7(s32)
+ %r1 = COPY %8(s32)
+ BX_RET 14, _, implicit %r0, implicit %r1
+...
diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
index 98d71c09e63ba..cbff7e12fb77c 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
@@ -1,10 +1,68 @@
# RUN: llc -mtriple arm-- -global-isel -run-pass=legalizer %s -o - | FileCheck %s
--- |
+ define void @test_sext_s8() { ret void }
+ define void @test_zext_s16() { ret void }
+
define void @test_add_s8() { ret void }
define void @test_add_s16() { ret void }
define void @test_add_s32() { ret void }
define void @test_load_from_stack() { ret void }
+ define void @test_legal_loads() #0 { ret void }
+ define void @test_legal_stores() #0 { ret void }
+
+ define void @test_gep() { ret void }
+
+ define void @test_constants() { ret void }
+
+ define void @test_fadd_s32() #0 { ret void }
+ define void @test_fadd_s64() #0 { ret void }
+
+ attributes #0 = { "target-features"="+vfp2" }
+...
+---
+name: test_sext_s8
+# CHECK-LABEL: name: test_sext_s8
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: %r0
+
+ %0(s8) = COPY %r0
+ %1(s32) = G_SEXT %0
+ ; G_SEXT with s8 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s32) = G_SEXT {{%[0-9]+}}
+ %r0 = COPY %1(s32)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_zext_s16
+# CHECK-LABEL: name: test_zext_s16
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: %r0
+
+ %0(s16) = COPY %r0
+ %1(s32) = G_ZEXT %0
+ ; G_ZEXT with s16 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s32) = G_ZEXT {{%[0-9]+}}
+ %r0 = COPY %1(s32)
+ BX_RET 14, _, implicit %r0
...
---
name: test_add_s8
@@ -104,8 +162,179 @@ body: |
; This is legal, so we should find it unchanged in the output
; CHECK: [[FIVREG:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[FRAME_INDEX]]
- ; CHECK: {{%[0-9]+}}(s32) = G_LOAD [[FIVREG]](p0)
+ ; CHECK: {{%[0-9]+}}(s32) = G_LOAD [[FIVREG]](p0) :: (load 4)
%0(p0) = G_FRAME_INDEX %fixed-stack.2
- %1(s32) = G_LOAD %0(p0)
+ %1(s32) = G_LOAD %0(p0) :: (load 4)
+ BX_RET 14, _
+...
+---
+name: test_legal_loads
+# CHECK-LABEL: name: test_legal_loads
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+ - { id: 5, class: _ }
+ - { id: 6, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3
+
+ ; These are all legal, so we should find them unchanged in the output
+ ; CHECK-DAG: {{%[0-9]+}}(s64) = G_LOAD %0
+ ; CHECK-DAG: {{%[0-9]+}}(s32) = G_LOAD %0
+ ; CHECK-DAG: {{%[0-9]+}}(s16) = G_LOAD %0
+ ; CHECK-DAG: {{%[0-9]+}}(s8) = G_LOAD %0
+ ; CHECK-DAG: {{%[0-9]+}}(s1) = G_LOAD %0
+ ; CHECK-DAG: {{%[0-9]+}}(p0) = G_LOAD %0
+ %0(p0) = COPY %r0
+ %1(s32) = G_LOAD %0(p0) :: (load 4)
+ %2(s16) = G_LOAD %0(p0) :: (load 2)
+ %3(s8) = G_LOAD %0(p0) :: (load 1)
+ %4(s1) = G_LOAD %0(p0) :: (load 1)
+ %5(p0) = G_LOAD %0(p0) :: (load 4)
+ %6(s64) = G_LOAD %0(p0) :: (load 8)
+ BX_RET 14, _
+...
+---
+name: test_legal_stores
+# CHECK-LABEL: name: test_legal_stores
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+ - { id: 5, class: _ }
+ - { id: 6, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3, %r4, %r5, %r6, %d1
+
+ ; These are all legal, so we should find them unchanged in the output
+ ; CHECK-DAG: G_STORE {{%[0-9]+}}(s64), %0(p0)
+ ; CHECK-DAG: G_STORE {{%[0-9]+}}(s32), %0(p0)
+ ; CHECK-DAG: G_STORE {{%[0-9]+}}(s16), %0(p0)
+ ; CHECK-DAG: G_STORE {{%[0-9]+}}(s8), %0(p0)
+ ; CHECK-DAG: G_STORE {{%[0-9]+}}(s1), %0(p0)
+ ; CHECK-DAG: G_STORE {{%[0-9]+}}(p0), %0(p0)
+ %0(p0) = COPY %r0
+ %1(s64) = COPY %d1
+ G_STORE %1(s64), %0(p0) :: (store 8)
+ %2(s32) = COPY %r2
+ G_STORE %2(s32), %0(p0) :: (store 4)
+ %3(s16) = COPY %r3
+ G_STORE %3(s16), %0(p0) :: (store 2)
+ %4(s8) = COPY %r4
+ G_STORE %4(s8), %0(p0) :: (store 1)
+ %5(s1) = COPY %r5
+ G_STORE %5(s1), %0(p0) :: (store 1)
+ %6(p0) = COPY %r6
+ G_STORE %6(p0), %0(p0) :: (store 4)
BX_RET 14, _
...
+---
+name: test_gep
+# CHECK-LABEL: name: test_gep
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(p0) = COPY %r0
+ %1(s32) = COPY %r1
+
+ ; CHECK: {{%[0-9]+}}(p0) = G_GEP {{%[0-9]+}}, {{%[0-9]+}}(s32)
+ %2(p0) = G_GEP %0, %1(s32)
+
+ %r0 = COPY %2(p0)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_constants
+# CHECK-LABEL: name: test_constants
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+body: |
+ bb.0:
+ %0(s32) = G_CONSTANT 42
+ ; CHECK: {{%[0-9]+}}(s32) = G_CONSTANT 42
+
+ %r0 = COPY %0(s32)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_fadd_s32
+# CHECK-LABEL: name: test_fadd_s32
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_FADD %0, %1
+ ; G_FADD with s32 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s32) = G_FADD {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_fadd_s64
+# CHECK-LABEL: name: test_fadd_s64
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %d0, %d1
+
+ %0(s64) = COPY %d0
+ %1(s64) = COPY %d1
+ %2(s64) = G_FADD %0, %1
+ ; G_FADD with s64 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s64) = G_FADD {{%[0-9]+, %[0-9]+}}
+ %d0 = COPY %2(s64)
+ BX_RET 14, _, implicit %d0
+
+...
diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
index ce0601021e62f..fbf8d81322f8f 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
@@ -3,6 +3,23 @@
define void @test_add_s32() { ret void }
define void @test_add_s16() { ret void }
define void @test_add_s8() { ret void }
+ define void @test_add_s1() { ret void }
+
+ define void @test_loads() #0 { ret void }
+ define void @test_stores() #0 { ret void }
+
+ define void @test_stack() { ret void }
+
+ define void @test_gep() { ret void }
+
+ define void @test_constants() { ret void }
+
+ define void @test_fadd_s32() #0 { ret void }
+ define void @test_fadd_s64() #0 { ret void }
+
+ define void @test_soft_fp_s64() #0 { ret void }
+
+ attributes #0 = { "target-features"="+vfp2"}
...
---
name: test_add_s32
@@ -82,3 +99,266 @@ body: |
BX_RET 14, _, implicit %r0
...
+---
+name: test_add_s1
+# CHECK-LABEL: name: test_add_s1
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s1) = COPY %r0
+ %1(s1) = COPY %r1
+ %2(s1) = G_ADD %0, %1
+ %r0 = COPY %2(s1)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_loads
+# CHECK-LABEL: name: test_loads
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 3, class: gprb }
+# CHECK: - { id: 4, class: gprb }
+# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 6, class: fprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+ - { id: 5, class: _ }
+ - { id: 6, class: _ }
+body: |
+ bb.0:
+ liveins: %r0
+ %0(p0) = COPY %r0
+ %6(s64) = G_LOAD %0 :: (load 8)
+ %1(s32) = G_LOAD %0 :: (load 4)
+ %2(s16) = G_LOAD %0 :: (load 2)
+ %3(s8) = G_LOAD %0 :: (load 1)
+ %4(s1) = G_LOAD %0 :: (load 1)
+ %5(p0) = G_LOAD %0 :: (load 4)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_stores
+# CHECK-LABEL: name: test_stores
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 3, class: gprb }
+# CHECK: - { id: 4, class: gprb }
+# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 6, class: fprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+ - { id: 5, class: _ }
+ - { id: 6, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2, %r3, %r4, %r5, %d6
+ %0(p0) = COPY %r0
+ %1(s32) = COPY %r1
+ G_STORE %1(s32), %0 :: (store 4)
+ %2(s16) = COPY %r2
+ G_STORE %2(s16), %0 :: (store 2)
+ %3(s8) = COPY %r3
+ G_STORE %3(s8), %0 :: (store 1)
+ %4(s1) = COPY %r4
+ G_STORE %4(s1), %0 :: (store 1)
+ %5(p0) = COPY %r5
+ G_STORE %5(p0), %0 :: (store 4)
+ %6(s64) = COPY %d6
+ G_STORE %6(s64), %0 :: (store 8)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_stack
+# CHECK-LABEL: name: test_stack
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 3, class: gprb }
+# CHECK: - { id: 4, class: gprb }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+fixedStack:
+ - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+body: |
+ bb.0:
+ %0(p0) = G_FRAME_INDEX %fixed-stack.0
+ %1(s32) = G_LOAD %0(p0) :: (load 4 from %fixed-stack.0, align 0)
+
+ %2(p0) = COPY %sp
+ %3(s32) = G_CONSTANT i32 8
+ %4(p0) = G_GEP %2, %3(s32)
+ G_STORE %1(s32), %4(p0) :: (store 4)
+
+ BX_RET 14, _
+
+...
+---
+name: test_gep
+# CHECK-LABEL: name: test_gep
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: gprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(p0) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(p0) = G_GEP %0, %1(s32)
+ %r0 = COPY %2(p0)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_constants
+# CHECK-LABEL: name: test_constants
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+registers:
+ - { id: 0, class: _ }
+body: |
+ bb.0:
+ %0(s32) = G_CONSTANT 42
+ %r0 = COPY %0(s32)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_fadd_s32
+# CHECK-LABEL: name: test_fadd_s32
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: fprb }
+# CHECK: - { id: 1, class: fprb }
+# CHECK: - { id: 2, class: fprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %s0, %s1
+
+ %0(s32) = COPY %s0
+ %1(s32) = COPY %s1
+ %2(s32) = G_FADD %0, %1
+ %s0 = COPY %2(s32)
+ BX_RET 14, _, implicit %s0
+
+...
+---
+name: test_fadd_s64
+# CHECK-LABEL: name: test_fadd_s64
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: fprb }
+# CHECK: - { id: 1, class: fprb }
+# CHECK: - { id: 2, class: fprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %d0, %d1
+
+ %0(s64) = COPY %d0
+ %1(s64) = COPY %d1
+ %2(s64) = G_FADD %0, %1
+ %d0 = COPY %2(s64)
+ BX_RET 14, _, implicit %d0
+
+...
+---
+name: test_soft_fp_s64
+# CHECK-LABEL: name: test_soft_fp_s64
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 2, class: fprb }
+# CHECK: - { id: 3, class: gprb }
+# CHECK: - { id: 4, class: gprb }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
+ %3(s32) = G_EXTRACT %2(s64), 0
+ %4(s32) = G_EXTRACT %2(s64), 32
+ %r0 = COPY %3(s32)
+ %r1 = COPY %4(s32)
+ BX_RET 14, _, implicit %r0, implicit %r1
+
+...
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
index 7d37c83d74838..0e077b3aee5a1 100644
--- a/test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=NO-REALIGN
-; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=REALIGN
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s
; rdar://12713765
; When realign-stack is set to false, make sure we are not creating stack
@@ -8,29 +7,31 @@
define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
entry:
-; NO-REALIGN-LABEL: test1
-; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
-; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
-; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
-; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: mov r[[R3:[0-9]+]], r[[R1]]
-; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]!
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]
-
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
+; CHECK-LABEL: test1
+; CHECK: ldr r[[R1:[0-9]+]], [pc, r1]
+; CHECK: add r[[R2:[0-9]+]], r1, #48
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
+; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: mov r[[R1:[0-9]+]], sp
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R1:[0-9]+]], r0, #48
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: add r[[R1:[0-9]+]], r0, #32
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
%retval = alloca <16 x float>, align 16
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
@@ -41,32 +42,33 @@ entry:
define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
entry:
-; REALIGN-LABEL: test2
-; REALIGN: bfc sp, #0, #6
-; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
-; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
-; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
-; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: ldr r[[R1:[0-9]+]], [pc, r1]
+; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: mov r[[R2:[0-9]+]], r[[R1]]
+; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: mov r[[R1:[0-9]+]], sp
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add r[[R1:[0-9]+]], r0, #48
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: add r[[R1:[0-9]+]], r0, #32
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
+; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
-; REALIGN: orr r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #32
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #16
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-
-; REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
- %retval = alloca <16 x float>, align 16
+%retval = alloca <16 x float>, align 16
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
%1 = load <16 x float>, <16 x float>* %retval
diff --git a/test/CodeGen/ARM/arg-copy-elide.ll b/test/CodeGen/ARM/arg-copy-elide.ll
new file mode 100644
index 0000000000000..739b560b0833f
--- /dev/null
+++ b/test/CodeGen/ARM/arg-copy-elide.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=armv7-linux < %s | FileCheck %s
+
+declare arm_aapcscc void @addrof_i32(i32*)
+declare arm_aapcscc void @addrof_i64(i64*)
+
+define arm_aapcscc void @simple(i32, i32, i32, i32, i32 %x) {
+entry:
+ %x.addr = alloca i32
+ store i32 %x, i32* %x.addr
+ call void @addrof_i32(i32* %x.addr)
+ ret void
+}
+
+; CHECK-LABEL: simple:
+; CHECK: push {r11, lr}
+; CHECK: add r0, sp, #8
+; CHECK: bl addrof_i32
+; CHECK: pop {r11, pc}
+
+
+; We need to load %x before calling addrof_i32 now because it could mutate %x in
+; place.
+
+define arm_aapcscc i32 @use_arg(i32, i32, i32, i32, i32 %x) {
+entry:
+ %x.addr = alloca i32
+ store i32 %x, i32* %x.addr
+ call void @addrof_i32(i32* %x.addr)
+ ret i32 %x
+}
+
+; CHECK-LABEL: use_arg:
+; CHECK: push {[[csr:[^ ]*]], lr}
+; CHECK: ldr [[csr]], [sp, #8]
+; CHECK: add r0, sp, #8
+; CHECK: bl addrof_i32
+; CHECK: mov r0, [[csr]]
+; CHECK: pop {[[csr]], pc}
+
+
+define arm_aapcscc i64 @split_i64(i32, i32, i32, i32, i64 %x) {
+entry:
+ %x.addr = alloca i64, align 4
+ store i64 %x, i64* %x.addr, align 4
+ call void @addrof_i64(i64* %x.addr)
+ ret i64 %x
+}
+
+; CHECK-LABEL: split_i64:
+; CHECK: push {r4, r5, r11, lr}
+; CHECK: sub sp, sp, #8
+; CHECK: ldr r4, [sp, #28]
+; CHECK: ldr r5, [sp, #24]
+; CHECK: mov r0, sp
+; CHECK: str r4, [sp, #4]
+; CHECK: str r5, [sp]
+; CHECK: bl addrof_i64
+; CHECK: mov r0, r5
+; CHECK: mov r1, r4
+; CHECK: add sp, sp, #8
+; CHECK: pop {r4, r5, r11, pc}
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 9bd2077e4d037..31691e9468c9e 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -1,7 +1,6 @@
; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=ARM %s
; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=THUMB %s
-; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \
-; RUN: | FileCheck -check-prefix=T2 %s
+; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck -check-prefix=T2 %s
; RUN: llc -mtriple=thumbv8-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=V8 %s
; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
@@ -49,9 +48,9 @@ tailrecurse.switch: ; preds = %tailrecurse
; V8-NEXT: beq
; V8-NEXT: %tailrecurse.switch
; V8: cmp
-; V8-NEXT: bne
-; V8-NEXT: b
-; The trailing space in the last line checks that the branch is unconditional
+; V8-NEXT: beq
+; V8-NEXT: %sw.epilog
+; V8-NEXT: bx lr
switch i32 %and, label %sw.epilog [
i32 1, label %sw.bb
i32 3, label %sw.bb6
@@ -93,7 +92,7 @@ entry:
%1 = load i8, i8* %0, align 1
%2 = zext i8 %1 to i32
; ARM: ands
-; THUMB: ands
+; THUMB: ands
; T2: ands
; V8: ands
; V8-NEXT: beq
@@ -141,19 +140,48 @@ return: ; preds = %bb2, %bb, %entry
; folding of unrelated tests (in this case, a TST against r1 was eliminated in
; favour of an AND of r0).
+define i32 @test_tst_assessment(i32 %a, i32 %b) {
; ARM-LABEL: test_tst_assessment:
+; ARM: @ BB#0:
+; ARM-NEXT: and r0, r0, #1
+; ARM-NEXT: tst r1, #1
+; ARM-NEXT: subne r0, r0, #1
+; ARM-NEXT: mov pc, lr
+;
; THUMB-LABEL: test_tst_assessment:
+; THUMB: @ BB#0:
+; THUMB-NEXT: movs r2, r0
+; THUMB-NEXT: movs r0, #1
+; THUMB-NEXT: ands r0, r2
+; THUMB-NEXT: subs r2, r0, #1
+; THUMB-NEXT: lsls r1, r1, #31
+; THUMB-NEXT: beq .LBB2_2
+; THUMB-NEXT: @ BB#1:
+; THUMB-NEXT: movs r0, r2
+; THUMB-NEXT: .LBB2_2:
+; THUMB-NEXT: bx lr
+;
; T2-LABEL: test_tst_assessment:
+; T2: @ BB#0:
+; T2-NEXT: lsls r1, r1, #31
+; T2-NEXT: and r0, r0, #1
+; T2-NEXT: it ne
+; T2-NEXT: subne r0, #1
+; T2-NEXT: bx lr
+;
; V8-LABEL: test_tst_assessment:
-define i32 @test_tst_assessment(i1 %lhs, i1 %rhs) {
- %lhs32 = zext i1 %lhs to i32
- %rhs32 = zext i1 %rhs to i32
- %diff = sub nsw i32 %lhs32, %rhs32
-; ARM: tst r1, #1
-; THUMB: lsls r1, r1, #31
-; T2: lsls r1, r1, #31
-; V8: lsls r1, r1, #31
- ret i32 %diff
+; V8: @ BB#0:
+; V8-NEXT: lsls r1, r1, #31
+; V8-NEXT: and r0, r0, #1
+; V8-NEXT: it ne
+; V8-NEXT: subne r0, #1
+; V8-NEXT: bx lr
+ %and1 = and i32 %a, 1
+ %sub = sub i32 %and1, 1
+ %and2 = and i32 %b, 1
+ %cmp = icmp eq i32 %and2, 0
+ %sel = select i1 %cmp, i32 %and1, i32 %sub
+ ret i32 %sel
}
!1 = !{!"branch_weights", i32 1, i32 1, i32 3, i32 2 }
diff --git a/test/CodeGen/ARM/arm-position-independence.ll b/test/CodeGen/ARM/arm-position-independence.ll
index 02a63984ad6f4..4aa817f7a4814 100644
--- a/test/CodeGen/ARM/arm-position-independence.ll
+++ b/test/CodeGen/ARM/arm-position-independence.ll
@@ -13,6 +13,12 @@
; RUN: llc -relocation-model=rwpi -mtriple=thumbv6m--none-eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1_RO_ABS --check-prefix=THUMB1_RW_SB
; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv6m--none-eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1_RO_PC --check-prefix=THUMB1_RW_SB
+; RUN: llc -relocation-model=rwpi -mtriple=armv7a--none-eabi -mattr=no-movt < %s | FileCheck %s --check-prefix=CHECK --check-prefix=NO_MOVT_ARM_RO_ABS --check-prefix=NO_MOVT_ARM_RW_SB
+; RUN: llc -relocation-model=ropi-rwpi -mtriple=armv7a--none-eabi -mattr=no-movt < %s | FileCheck %s --check-prefix=CHECK --check-prefix=NO_MOVT_ARM_RO_PC --check-prefix=NO_MOVT_ARM_RW_SB
+
+; RUN: llc -relocation-model=rwpi -mtriple=thumbv7m--none-eabi -mattr=no-movt < %s | FileCheck %s --check-prefix=CHECK --check-prefix=NO_MOVT_THUMB2_RO_ABS --check-prefix=NO_MOVT_THUMB2_RW_SB
+; RUN: llc -relocation-model=ropi-rwpi -mtriple=thumbv7m--none-eabi -mattr=no-movt < %s | FileCheck %s --check-prefix=CHECK --check-prefix=NO_MOVT_THUMB2_RO_PC --check-prefix=NO_MOVT_THUMB2_RW_SB
+
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@a = external global i32, align 4
@@ -28,16 +34,24 @@ entry:
; ARM_RW_ABS: movt r[[REG]], :upper16:a
; ARM_RW_ABS: ldr r0, [r[[REG]]]
-; ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; ARM_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel)
+; ARM_RW_SB: movt r[[REG]], :upper16:a(sbrel)
; ARM_RW_SB: ldr r0, [r9, r[[REG]]]
+; NO_MOVT_ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_ARM_RW_SB: ldr r0, [r9, r[[REG]]]
+
; THUMB2_RW_ABS: movw r[[REG:[0-9]]], :lower16:a
; THUMB2_RW_ABS: movt r[[REG]], :upper16:a
; THUMB2_RW_ABS: ldr r0, [r[[REG]]]
-; THUMB2_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; THUMB2_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel)
+; THUMB2_RW_SB: movt r[[REG]], :upper16:a(sbrel)
; THUMB2_RW_SB: ldr.w r0, [r9, r[[REG]]]
+; NO_MOVT_THUMB2_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_THUMB2_RW_SB: ldr.w r0, [r9, r[[REG]]]
+
; THUMB1_RW_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
; THUMB1_RW_ABS: ldr r0, [r[[REG]]]
@@ -47,11 +61,11 @@ entry:
; CHECK: {{(bx lr|pop)}}
-; ARM_RW_SB: [[LCPI]]
-; ARM_RW_SB: .long a(sbrel)
+; NO_MOVT_ARM_RW_SB: [[LCPI]]
+; NO_MOVT_ARM_RW_SB: .long a(sbrel)
-; THUMB2_RW_SB: [[LCPI]]
-; THUMB2_RW_SB: .long a(sbrel)
+; NO_MOVT_THUMB2_RW_SB: [[LCPI]]
+; NO_MOVT_THUMB2_RW_SB: .long a(sbrel)
; THUMB1_RW_ABS: [[LCPI]]
; THUMB1_RW_ABS-NEXT: .long a
@@ -70,16 +84,24 @@ entry:
; ARM_RW_ABS: movt r[[REG]], :upper16:a
; ARM_RW_ABS: str r0, [r[[REG:[0-9]]]]
-; ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
-; ARM_RW_SB: str r0, [r9, r[[REG]]]
+; ARM_RW_SB: movw r[[REG:[0-9]]], :lower16:a
+; ARM_RW_SB: movt r[[REG]], :upper16:a
+; ARM_RW_SB: str r0, [r9, r[[REG:[0-9]]]]
+
+; NO_MOVT_ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_ARM_RW_SB: str r0, [r9, r[[REG]]]
; THUMB2_RW_ABS: movw r[[REG:[0-9]]], :lower16:a
; THUMB2_RW_ABS: movt r[[REG]], :upper16:a
; THUMB2_RW_ABS: str r0, [r[[REG]]]
-; THUMB2_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; THUMB2_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel)
+; THUMB2_RW_SB: movt r[[REG]], :upper16:a(sbrel)
; THUMB2_RW_SB: str.w r0, [r9, r[[REG]]]
+; NO_MOVT_THUMB2_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_THUMB2_RW_SB: str.w r0, [r9, r[[REG]]]
+
; THUMB1_RW_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
; THUMB1_RW_ABS: str r0, [r[[REG]]]
@@ -89,11 +111,11 @@ entry:
; CHECK: {{(bx lr|pop)}}
-; ARM_RW_SB: [[LCPI]]
-; ARM_RW_SB: .long a(sbrel)
+; NO_MOVT_ARM_RW_SB: [[LCPI]]
+; NO_MOVT_ARM_RW_SB: .long a(sbrel)
-; THUMB2_RW_SB: [[LCPI]]
-; THUMB2_RW_SB: .long a(sbrel)
+; NO_MOVT_THUMB2_RW_SB: [[LCPI]]
+; NO_MOVT_THUMB2_RW_SB: .long a(sbrel)
; THUMB1_RW_ABS: [[LCPI]]
; THUMB1_RW_ABS-NEXT: .long a
@@ -112,21 +134,37 @@ entry:
; ARM_RO_ABS: movt r[[reg]], :upper16:b
; ARM_RO_ABS: ldr r0, [r[[reg]]]
+; NO_MOVT_ARM_RO_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_ARM_RO_ABS: ldr r0, [r[[REG]]]
+
; ARM_RO_PC: movw r[[REG:[0-9]]], :lower16:(b-([[LPC:.LPC[0-9]+_[0-9]+]]+8))
; ARM_RO_PC: movt r[[REG]], :upper16:(b-([[LPC]]+8))
; ARM_RO_PC: [[LPC]]:
; ARM_RO_PC-NEXT: ldr r0, [pc, r[[REG]]]
+; NO_MOVT_ARM_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_ARM_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]:
+; NO_MOVT_ARM_RO_PC: ldr r0, [pc, r[[REG]]]
+
; THUMB2_RO_ABS: movw r[[REG:[0-9]]], :lower16:b
; THUMB2_RO_ABS: movt r[[REG]], :upper16:b
; THUMB2_RO_ABS: ldr r0, [r[[REG]]]
+; NO_MOVT_THUMB2_RO_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_THUMB2_RO_ABS: ldr r0, [r[[REG]]]
+
; THUMB2_RO_PC: movw r[[REG:[0-9]]], :lower16:(b-([[LPC:.LPC[0-9]+_[0-9]+]]+4))
; THUMB2_RO_PC: movt r[[REG]], :upper16:(b-([[LPC]]+4))
; THUMB2_RO_PC: [[LPC]]:
; THUMB2_RO_PC-NEXT: add r[[REG]], pc
; THUMB2_RO_PC: ldr r0, [r[[REG]]]
+; NO_MOVT_THUMB2_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_THUMB2_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]:
+; NO_MOVT_THUMB2_RO_PC-NEXT: add r[[REG]], pc
+; NO_MOVT_THUMB2_RO_PC: ldr r0, [r[[REG]]]
+
+
; THUMB1_RO_ABS: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
; THUMB1_RO_ABS: ldr r0, [r[[REG]]]
@@ -137,9 +175,21 @@ entry:
; CHECK: {{(bx lr|pop)}}
+; NO_MOVT_ARM_RO_ABS: [[LCPI]]
+; NO_MOVT_ARM_RO_ABS-NEXT: .long b
+
+; NO_MOVT_THUMB2_RO_ABS: [[LCPI]]
+; NO_MOVT_THUMB2_RO_ABS-NEXT: .long b
+
; THUMB1_RO_ABS: [[LCPI]]
; THUMB1_RO_ABS-NEXT: .long b
+; NO_MOVT_ARM_RO_PC: [[LCPI]]
+; NO_MOVT_ARM_RO_PC-NEXT: .long b-([[LPC]]+8)
+
+; NO_MOVT_THUMB2_RO_PC: [[LCPI]]
+; NO_MOVT_THUMB2_RO_PC-NEXT: .long b-([[LPC]]+4)
+
; THUMB1_RO_PC: [[LCPI]]
; THUMB1_RO_PC-NEXT: .long b-([[LPC]]+4)
}
@@ -152,15 +202,23 @@ entry:
; ARM_RW_ABS: movw r[[REG:[0-9]]], :lower16:a
; ARM_RW_ABS: movt r[[REG]], :upper16:a
-; ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; ARM_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel)
+; ARM_RW_SB: movt r[[REG]], :upper16:a(sbrel)
; ARM_RW_SB: add r0, r9, r[[REG]]
+; NO_MOVT_ARM_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_ARM_RW_SB: add r0, r9, r[[REG]]
+
; THUMB2_RW_ABS: movw r[[REG:[0-9]]], :lower16:a
; THUMB2_RW_ABS: movt r[[REG]], :upper16:a
-; THUMB2_RW_SB: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; THUMB2_RW_SB: movw r[[REG:[0-9]]], :lower16:a(sbrel)
+; THUMB2_RW_SB: movt r[[REG]], :upper16:a(sbrel)
; THUMB2_RW_SB: add r0, r9
+; NO_MOVT_THUMB2_RW_SB: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_THUMB2_RW_SB: add r0, r9
+
; THUMB1_RW_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]]
; THUMB1_RW_SB: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
@@ -169,11 +227,11 @@ entry:
; CHECK: {{(bx lr|pop)}}
-; ARM_RW_SB: [[LCPI]]
-; ARM_RW_SB: .long a(sbrel)
+; NO_MOVT_ARM_RW_SB: [[LCPI]]
+; NO_MOVT_ARM_RW_SB: .long a(sbrel)
-; THUMB2_RW_SB: [[LCPI]]
-; THUMB2_RW_SB: .long a(sbrel)
+; NO_MOVT_THUMB2_RW_SB: [[LCPI]]
+; NO_MOVT_THUMB2_RW_SB: .long a(sbrel)
; THUMB1_RW_ABS: [[LCPI]]
; THUMB1_RW_ABS-NEXT: .long a
@@ -190,19 +248,31 @@ entry:
; ARM_RO_ABS: movw r[[REG:[0-9]]], :lower16:b
; ARM_RO_ABS: movt r[[REG]], :upper16:b
+; NO_MOVT_ARM_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]]
+
; ARM_RO_PC: movw r[[REG:[0-9]]], :lower16:(b-([[LPC:.LPC[0-9]+_[0-9]+]]+8))
; ARM_RO_PC: movt r[[REG]], :upper16:(b-([[LPC]]+8))
; ARM_RO_PC: [[LPC]]:
; ARM_RO_PC-NEXT: add r0, pc, r[[REG:[0-9]]]
+; NO_MOVT_ARM_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_ARM_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]:
+; NO_MOVT_ARM_RO_PC-NEXT: add r0, pc, r[[REG]]
+
; THUMB2_RO_ABS: movw r[[REG:[0-9]]], :lower16:b
; THUMB2_RO_ABS: movt r[[REG]], :upper16:b
+; NO_MOVT_THUMB2_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]]
+
; THUMB2_RO_PC: movw r0, :lower16:(b-([[LPC:.LPC[0-9]+_[0-9]+]]+4))
; THUMB2_RO_PC: movt r0, :upper16:(b-([[LPC]]+4))
; THUMB2_RO_PC: [[LPC]]:
; THUMB2_RO_PC-NEXT: add r0, pc
+; NO_MOVT_THUMB2_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_THUMB2_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]:
+; NO_MOVT_THUMB2_RO_PC-NEXT: add r[[REG]], pc
+
; THUMB1_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]]
; THUMB1_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
@@ -211,9 +281,21 @@ entry:
; CHECK: {{(bx lr|pop)}}
+; NO_MOVT_ARM_RO_ABS: [[LCPI]]
+; NO_MOVT_ARM_RO_ABS-NEXT: .long b
+
+; NO_MOVT_THUMB2_RO_ABS: [[LCPI]]
+; NO_MOVT_THUMB2_RO_ABS-NEXT: .long b
+
; THUMB1_RO_ABS: [[LCPI]]
; THUMB1_RO_ABS-NEXT: .long b
+; NO_MOVT_ARM_RO_PC: [[LCPI]]
+; NO_MOVT_ARM_RO_PC-NEXT: .long b-([[LPC]]+8)
+
+; NO_MOVT_THUMB2_RO_PC: [[LCPI]]
+; NO_MOVT_THUMB2_RO_PC-NEXT: .long b-([[LPC]]+4)
+
; THUMB1_RO_PC: [[LCPI]]
; THUMB1_RO_PC-NEXT: .long b-([[LPC]]+4)
}
@@ -226,19 +308,31 @@ entry:
; ARM_RO_ABS: movw r[[REG:[0-9]]], :lower16:take_addr_func
; ARM_RO_ABS: movt r[[REG]], :upper16:take_addr_func
+; NO_MOVT_ARM_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]]
+
; ARM_RO_PC: movw r[[REG:[0-9]]], :lower16:(take_addr_func-([[LPC:.LPC[0-9]+_[0-9]+]]+8))
; ARM_RO_PC: movt r[[REG]], :upper16:(take_addr_func-([[LPC]]+8))
; ARM_RO_PC: [[LPC]]:
; ARM_RO_PC-NEXT: add r0, pc, r[[REG:[0-9]]]
+; NO_MOVT_ARM_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_ARM_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]:
+; NO_MOVT_ARM_RO_PC-NEXT: add r0, pc, r[[REG]]
+
; THUMB2_RO_ABS: movw r[[REG:[0-9]]], :lower16:take_addr_func
; THUMB2_RO_ABS: movt r[[REG]], :upper16:take_addr_func
+; NO_MOVT_THUMB2_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]]
+
; THUMB2_RO_PC: movw r0, :lower16:(take_addr_func-([[LPC:.LPC[0-9]+_[0-9]+]]+4))
; THUMB2_RO_PC: movt r0, :upper16:(take_addr_func-([[LPC]]+4))
; THUMB2_RO_PC: [[LPC]]:
; THUMB2_RO_PC-NEXT: add r0, pc
+; NO_MOVT_THUMB2_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
+; NO_MOVT_THUMB2_RO_PC: [[LPC:.LPC[0-9]+_[0-9]+]]:
+; NO_MOVT_THUMB2_RO_PC-NEXT: add r[[REG]], pc
+
; THUMB1_RO_ABS: ldr r0, [[LCPI:.LCPI[0-9]+_[0-9]+]]
; THUMB1_RO_PC: ldr r[[REG:[0-9]]], [[LCPI:.LCPI[0-9]+_[0-9]+]]
@@ -247,9 +341,21 @@ entry:
; CHECK: {{(bx lr|pop)}}
+; NO_MOVT_ARM_RO_ABS: [[LCPI]]
+; NO_MOVT_ARM_RO_ABS-NEXT: .long take_addr_func
+
+; NO_MOVT_THUMB2_RO_ABS: [[LCPI]]
+; NO_MOVT_THUMB2_RO_ABS-NEXT: .long take_addr_func
+
; THUMB1_RO_ABS: [[LCPI]]
; THUMB1_RO_ABS-NEXT: .long take_addr_func
+; NO_MOVT_ARM_RO_PC: [[LCPI]]
+; NO_MOVT_ARM_RO_PC-NEXT: .long take_addr_func-([[LPC]]+8)
+
+; NO_MOVT_THUMB2_RO_PC: [[LCPI]]
+; NO_MOVT_THUMB2_RO_PC-NEXT: .long take_addr_func-([[LPC]]+4)
+
; THUMB1_RO_PC: [[LCPI]]
; THUMB1_RO_PC-NEXT: .long take_addr_func-([[LPC]]+4)
}
diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll
index 364bd5d13691e..e026bae361e19 100644
--- a/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -24,14 +24,12 @@ entry:
; CHECK-THUMB-LABEL: test_cmpxchg_res_i8
; CHECK-THUMB: bl __sync_val_compare_and_swap_1
; CHECK-THUMB-NOT: mov [[R1:r[0-7]]], r0
-; CHECK-THUMB: push {r0}
-; CHECK-THUMB: pop {[[R1:r[0-7]]]}
+; CHECK-THUMB: movs [[R1:r[0-7]]], r0
; CHECK-THUMB: movs r0, #1
; CHECK-THUMB: movs [[R2:r[0-9]+]], #0
; CHECK-THUMB: cmp [[R1]], {{r[0-9]+}}
; CHECK-THUMB: beq
-; CHECK-THUMB: push {[[R2]]}
-; CHECK-THUMB: pop {r0}
+; CHECK-THUMB: movs r0, [[R2]]
; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8:
; CHECK-ARMV6-NEXT: .fnstart
@@ -66,14 +64,14 @@ entry:
; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]:
; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
-; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1
+; CHECK-ARMV7-NEXT: moveq r0, #1
; CHECK-ARMV7-NEXT: bxeq lr
; CHECK-ARMV7-NEXT: [[TRY]]:
-; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
-; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0]
+; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
; CHECK-ARMV7-NEXT: beq [[HEAD]]
; CHECK-ARMV7-NEXT: clrex
-; CHECK-ARMV7-NEXT: mov [[RES]], #0
+; CHECK-ARMV7-NEXT: mov r0, #0
; CHECK-ARMV7-NEXT: bx lr
; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index e6a4949d53ce4..23c4ccea46046 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -320,10 +320,10 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
; CHECK: cmp [[SUCCESS]], #0
; CHECK: bne [[LOOP_BB]]
-; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK: dmb ish
+; CHECK: bx lr
; CHECK: [[FAIL_BB]]:
; CHECK-NEXT: clrex
-; CHECK-NEXT: [[END_BB]]:
; CHECK: dmb ish
; CHECK: bx lr
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index 77b850bd617b8..d1575ed12e4e1 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1045,20 +1045,21 @@ define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind
; function there.
; CHECK-ARM-NEXT: cmp r[[OLD]], r0
; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
-; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
-; CHECK-NEXT: b .LBB{{[0-9]+}}_4
-; CHECK-NEXT: .LBB{{[0-9]+}}_3:
-; CHECK-NEXT: clrex
+; CHECK-ARM: mov r0, r[[OLD]]
+; CHECK: bx lr
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
+; CHECK-NEXT: clrex
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK-ARM: mov r0, r[[OLD]]
+; CHECK-ARM-NEXT: bx lr
ret i8 %old
}
@@ -1078,20 +1079,21 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw
; function there.
; CHECK-ARM-NEXT: cmp r[[OLD]], r0
; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
-; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
-; CHECK-NEXT: b .LBB{{[0-9]+}}_4
-; CHECK-NEXT: .LBB{{[0-9]+}}_3:
-; CHECK-NEXT: clrex
+; CHECK-ARM: mov r0, r[[OLD]]
+; CHECK: bx lr
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
+; CHECK-NEXT: clrex
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK-ARM: mov r0, r[[OLD]]
+; CHECK-ARM-NEXT: bx lr
ret i16 %old
}
@@ -1110,20 +1112,21 @@ define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: cmp r[[OLD]], r0
-; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
-; CHECK-NEXT: b .LBB{{[0-9]+}}_4
-; CHECK-NEXT: .LBB{{[0-9]+}}_3:
-; CHECK-NEXT: clrex
+; CHECK: str{{(.w)?}} r[[OLD]],
+; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
+; CHECK-NEXT: clrex
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK: str{{(.w)?}} r[[OLD]],
+; CHECK-ARM-NEXT: bx lr
ret void
}
@@ -1148,16 +1151,16 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]]
-; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
; CHECK-NEXT: BB#2:
; As above, r2, r3 is a reasonable guess.
; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
-; CHECK-NEXT: b .LBB{{[0-9]+}}_4
-; CHECK-NEXT: .LBB{{[0-9]+}}_3:
-; CHECK-NEXT: clrex
+; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+; CHECK-NEXT: pop
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
+; CHECK-NEXT: clrex
; CHECK-NOT: dmb
; CHECK-NOT: mcr
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index 893fef3add7e1..31eff16fcc3c4 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -77,7 +77,7 @@ entry:
define i32 @f7(i32 %x, i32 %y) {
; CHECK-LABEL: f7:
-; CHECK: bfi r1, r0, #4, #1
+; CHECK: bfi r0, r2, #4, #1
%y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
%and = and i32 %x, 4
%or = or i32 %y2, 16
@@ -88,8 +88,8 @@ define i32 @f7(i32 %x, i32 %y) {
define i32 @f8(i32 %x, i32 %y) {
; CHECK-LABEL: f8:
-; CHECK: bfi r1, r0, #4, #1
-; CHECK: bfi r1, r0, #5, #1
+; CHECK: bfi r0, r2, #4, #1
+; CHECK: bfi r0, r2, #5, #1
%y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
%and = and i32 %x, 4
%or = or i32 %y2, 48
@@ -111,7 +111,7 @@ define i32 @f9(i32 %x, i32 %y) {
define i32 @f10(i32 %x, i32 %y) {
; CHECK-LABEL: f10:
-; CHECK: bfi r1, r0, #4, #2
+; CHECK: bfi r0, r2, #4, #2
%y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
%and = and i32 %x, 4
%or = or i32 %y2, 32
@@ -128,7 +128,7 @@ define i32 @f10(i32 %x, i32 %y) {
define i32 @f11(i32 %x, i32 %y) {
; CHECK-LABEL: f11:
-; CHECK: bfi r1, r0, #4, #3
+; CHECK: bfi r0, r2, #4, #3
%y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
%and = and i32 %x, 4
%or = or i32 %y2, 32
@@ -150,7 +150,7 @@ define i32 @f11(i32 %x, i32 %y) {
define i32 @f12(i32 %x, i32 %y) {
; CHECK-LABEL: f12:
-; CHECK: bfi r1, r0, #4, #1
+; CHECK: bfi r0, r2, #4, #1
%y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
%and = and i32 %x, 4
%or = or i32 %y2, 16
diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll
index 691f8be4ab66b..8be59898bd0fe 100644
--- a/test/CodeGen/ARM/bic.ll
+++ b/test/CodeGen/ARM/bic.ll
@@ -1,17 +1,24 @@
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
define i32 @f1(i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: bic r0, r0, r1
%tmp = xor i32 %b, 4294967295
%tmp1 = and i32 %a, %tmp
ret i32 %tmp1
}
-; CHECK: bic r0, r0, r1
-
define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: bic r0, r0, r1
%tmp = xor i32 %b, 4294967295
%tmp1 = and i32 %tmp, %a
ret i32 %tmp1
}
-; CHECK: bic r0, r0, r1
+define i32 @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: bic r0, r0, #255
+ %tmp = and i32 %a, -256
+ ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/bool-ext-inc.ll b/test/CodeGen/ARM/bool-ext-inc.ll
new file mode 100644
index 0000000000000..fe43f1b2ef93d
--- /dev/null
+++ b/test/CodeGen/ARM/bool-ext-inc.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-eabi -mattr=neon | FileCheck %s
+
+define i32 @sext_inc(i1 zeroext %x) {
+; CHECK-LABEL: sext_inc:
+; CHECK: @ BB#0:
+; CHECK-NEXT: rsb r0, r0, #1
+; CHECK-NEXT: mov pc, lr
+ %ext = sext i1 %x to i32
+ %add = add i32 %ext, 1
+ ret i32 %add
+}
+
+define <4 x i32> @sext_inc_vec(<4 x i1> %x) {
+; CHECK-LABEL: sext_inc_vec:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmov.i32 q9, #0x1f
+; CHECK-NEXT: vmov.i32 q10, #0x1
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vneg.s32 q9, q9
+; CHECK-NEXT: vshl.i32 q8, q8, #31
+; CHECK-NEXT: vshl.s32 q8, q8, q9
+; CHECK-NEXT: vadd.i32 q8, q8, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
+ %ext = sext <4 x i1> %x to <4 x i32>
+ %add = add <4 x i32> %ext, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %add
+}
+
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index b1b3b46dce249..fc85a3a2e6834 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -102,6 +102,10 @@
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=+fp-only-sp -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-FAST
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CORTEX-M7-DOUBLE
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m23 | FileCheck %s --check-prefix=CORTEX-M23
+; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=CORTEX-M33
+; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M33-FAST
+; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m33 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4 | FileCheck %s --check-prefix=CORTEX-R4
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4f | FileCheck %s --check-prefix=CORTEX-R4F
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5
@@ -182,6 +186,8 @@
; ARMv7a
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; ARMv7ve
+; RUN: llc < %s -mtriple=armv7ve-none-linux-gnueabi | FileCheck %s --check-prefix=V7VE
; ARMv7r
; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
@@ -210,6 +216,12 @@
; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-r52 -mattr=-neon,+fp-only-sp,+d16 | FileCheck %s --check-prefix=ARMv8R --check-prefix=ARMv8R-SP
; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-r52 | FileCheck %s --check-prefix=ARMv8R --check-prefix=ARMv8R-NEON
+; ARMv8-M
+; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m23 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m23 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+
; XSCALE: .eabi_attribute 6, 5
; XSCALE: .eabi_attribute 8, 1
; XSCALE: .eabi_attribute 9, 1
@@ -369,6 +381,22 @@
; V7-FAST-NOT: .eabi_attribute 22
; V7-FAST: .eabi_attribute 23, 1
+; V7VE: .syntax unified
+; V7VE: .eabi_attribute 6, 10 @ Tag_CPU_arch
+; V7VE: .eabi_attribute 7, 65 @ Tag_CPU_arch_profile
+; V7VE: .eabi_attribute 8, 1 @ Tag_ARM_ISA_use
+; V7VE: .eabi_attribute 9, 2 @ Tag_THUMB_ISA_use
+; V7VE: .eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use
+; V7VE: .eabi_attribute 20, 1 @ Tag_ABI_FP_denormal
+; V7VE: .eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions
+; V7VE: .eabi_attribute 23, 3 @ Tag_ABI_FP_number_model
+; V7VE: .eabi_attribute 24, 1 @ Tag_ABI_align_needed
+; V7VE: .eabi_attribute 25, 1 @ Tag_ABI_align_preserved
+; V7VE: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format
+; V7VE: .eabi_attribute 42, 1 @ Tag_MPextension_use
+; V7VE: .eabi_attribute 44, 2 @ Tag_DIV_use
+; V7VE: .eabi_attribute 68, 3 @ Tag_Virtualization_use
+
; V8: .syntax unified
; V8: .eabi_attribute 67, "2.09"
; V8: .eabi_attribute 6, 14
@@ -1310,6 +1338,55 @@
; CORTEX-A32-FAST-NOT: .eabi_attribute 22
; CORTEX-A32-FAST: .eabi_attribute 23, 1
+; CORTEX-M23: .cpu cortex-m23
+; CORTEX-M23: .eabi_attribute 6, 16
+; CORTEX-M23: .eabi_attribute 7, 77
+; CORTEX-M23: .eabi_attribute 8, 0
+; CORTEX-M23: .eabi_attribute 9, 3
+; CORTEX-M23: .eabi_attribute 17, 1
+;; We default to IEEE 754 compliance
+; CORTEX-M23-NOT: .eabi_attribute 19
+; CORTEX-M23: .eabi_attribute 20, 1
+; CORTEX-M23: .eabi_attribute 21, 1
+; CORTEX-M23: .eabi_attribute 23, 3
+; CORTEX-M23: .eabi_attribute 34, 1
+; CORTEX-M23: .eabi_attribute 24, 1
+; CORTEX-M23-NOT: .eabi_attribute 27
+; CORTEX-M23-NOT: .eabi_attribute 28
+; CORTEX-M23: .eabi_attribute 25, 1
+; CORTEX-M23: .eabi_attribute 38, 1
+; CORTEX-M23: .eabi_attribute 14, 0
+; CORTEX-M23-NOT: .eabi_attribute 44
+
+; CORTEX-M33: .cpu cortex-m33
+; CORTEX-M33: .eabi_attribute 6, 17
+; CORTEX-M33: .eabi_attribute 7, 77
+; CORTEX-M33: .eabi_attribute 8, 0
+; CORTEX-M33: .eabi_attribute 9, 3
+; CORTEX-M33: .fpu fpv5-sp-d16
+; CORTEX-M33: .eabi_attribute 17, 1
+;; We default to IEEE 754 compliance
+; CORTEX-M23-NOT: .eabi_attribute 19
+; CORTEX-M33: .eabi_attribute 20, 1
+; CORTEX-M33: .eabi_attribute 21, 1
+; CORTEX-M33: .eabi_attribute 23, 3
+; CORTEX-M33: .eabi_attribute 34, 1
+; CORTEX-M33: .eabi_attribute 24, 1
+; CORTEX-M33: .eabi_attribute 25, 1
+; CORTEX-M33: .eabi_attribute 27, 1
+; CORTEX-M33-NOT: .eabi_attribute 28
+; CORTEX-M33: .eabi_attribute 36, 1
+; CORTEX-M33: .eabi_attribute 38, 1
+; CORTEX-M33: .eabi_attribute 46, 1
+; CORTEX-M33-NOT: .eabi_attribute 44
+; CORTEX-M33: .eabi_attribute 14, 0
+
+; CORTEX-M33-FAST-NOT: .eabi_attribute 19
+; CORTEX-M33-FAST: .eabi_attribute 20, 2
+; CORTEX-M33-FAST-NOT: .eabi_attribute 21
+; CORTEX-M33-FAST-NOT: .eabi_attribute 22
+; CORTEX-M33-FAST: .eabi_attribute 23, 1
+
; CORTEX-A35: .cpu cortex-a35
; CORTEX-A35: .eabi_attribute 6, 14
; CORTEX-A35: .eabi_attribute 7, 65
diff --git a/test/CodeGen/ARM/cmp1-peephole-thumb.mir b/test/CodeGen/ARM/cmp1-peephole-thumb.mir
new file mode 100644
index 0000000000000..5ace58fd06584
--- /dev/null
+++ b/test/CodeGen/ARM/cmp1-peephole-thumb.mir
@@ -0,0 +1,78 @@
+# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s
+
+--- |
+ ; ModuleID = '<stdin>'
+ source_filename = "<stdin>"
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumb-none--eabi"
+
+ define i32 @f(i32 %a, i32 %b) {
+ entry:
+ %mul = mul nsw i32 %b, %a
+ %cmp = icmp eq i32 %mul, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+ }
+
+...
+---
+name: f
+# CHECK-LABEL: name: f
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: tgpr }
+ - { id: 1, class: tgpr }
+ - { id: 2, class: tgpr }
+ - { id: 3, class: tgpr }
+ - { id: 4, class: tgpr }
+ - { id: 5, class: tgpr }
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+ - { reg: '%r1', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+
+# CHECK: tMOVi8 1, 14, _
+# CHECK: tMOVi8 0, 14, _
+# CHECK: tMUL %1, %0, 14, _
+# CHECK-NOT: tCMPi8
+body: |
+ bb.0.entry:
+ successors: %bb.1.entry(0x40000000), %bb.2.entry(0x40000000)
+ liveins: %r0, %r1
+
+ %1 = COPY %r1
+ %0 = COPY %r0
+ %2, %cpsr = tMUL %1, %0, 14, _
+ %3, %cpsr = tMOVi8 1, 14, _
+ %4, %cpsr = tMOVi8 0, 14, _
+ tCMPi8 killed %2, 0, 14, _, implicit-def %cpsr
+ tBcc %bb.2.entry, 0, %cpsr
+
+ bb.1.entry:
+ successors: %bb.2.entry(0x80000000)
+
+
+ bb.2.entry:
+ %5 = PHI %4, %bb.1.entry, %3, %bb.0.entry
+ %r0 = COPY %5
+ tBX_RET 14, _, implicit %r0
+
+...
diff --git a/test/CodeGen/ARM/cmp2-peephole-thumb.mir b/test/CodeGen/ARM/cmp2-peephole-thumb.mir
new file mode 100644
index 0000000000000..6e9ca70f1741d
--- /dev/null
+++ b/test/CodeGen/ARM/cmp2-peephole-thumb.mir
@@ -0,0 +1,108 @@
+# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s
+
+# Here we check that the peephole cmp rewrite is not triggered, because
+# there is store instruction between the tMUL and tCMP, i.e. there are
+# no constants to reorder.
+
+--- |
+ ; ModuleID = 'cmp2-peephole-thumb.ll'
+ source_filename = "<stdin>"
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumb-none--eabi"
+
+ define i32 @g(i32 %a, i32 %b) {
+ entry:
+ %retval = alloca i32, align 4
+ %mul = alloca i32, align 4
+ %mul1 = mul nsw i32 %a, %b
+ store i32 %mul1, i32* %mul, align 4
+ %0 = load i32, i32* %mul, align 4
+ %cmp = icmp sle i32 %0, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+ if.then: ; preds = %entry
+ store i32 42, i32* %retval, align 4
+ br label %return
+
+ if.end: ; preds = %entry
+ store i32 1, i32* %retval, align 4
+ br label %return
+
+ return: ; preds = %if.end, %if.then
+ %1 = load i32, i32* %retval, align 4
+ ret i32 %1
+ }
+
+...
+---
+name: g
+# CHECK-LABEL: name: g
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: tgpr }
+ - { id: 1, class: tgpr }
+ - { id: 2, class: tgpr }
+ - { id: 3, class: tgpr }
+ - { id: 4, class: tgpr }
+ - { id: 5, class: tgpr }
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+ - { reg: '%r1', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+stack:
+ - { id: 0, name: retval, offset: 0, size: 4, alignment: 4, local-offset: -4 }
+ - { id: 1, name: mul, offset: 0, size: 4, alignment: 4, local-offset: -8 }
+
+# CHECK: tMUL
+# CHECK-NEXT: tSTRspi
+# CHECK-NEXT: tCMPi8
+body: |
+ bb.0.entry:
+ successors: %bb.1.if.then(0x40000000), %bb.2.if.end(0x40000000)
+ liveins: %r0, %r1
+
+ %1 = COPY %r1
+ %0 = COPY %r0
+ %2, %cpsr = tMUL %0, %1, 14, _
+ tSTRspi %2, %stack.1.mul, 0, 14, _ :: (store 4 into %ir.mul)
+ tCMPi8 %2, 0, 14, _, implicit-def %cpsr
+ tBcc %bb.2.if.end, 12, %cpsr
+ tB %bb.1.if.then, 14, _
+
+ bb.1.if.then:
+ successors: %bb.3.return(0x80000000)
+
+ %4, %cpsr = tMOVi8 42, 14, _
+ tSTRspi killed %4, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval)
+ tB %bb.3.return, 14, _
+
+ bb.2.if.end:
+ successors: %bb.3.return(0x80000000)
+
+ %3, %cpsr = tMOVi8 1, 14, _
+ tSTRspi killed %3, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval)
+
+ bb.3.return:
+ %5 = tLDRspi %stack.0.retval, 0, 14, _ :: (dereferenceable load 4 from %ir.retval)
+ %r0 = COPY %5
+ tBX_RET 14, _, implicit %r0
+
+...
diff --git a/test/CodeGen/ARM/cmpxchg-weak.ll b/test/CodeGen/ARM/cmpxchg-weak.ll
index 4038528c91bc8..0d5681aafbcb0 100644
--- a/test/CodeGen/ARM/cmpxchg-weak.ll
+++ b/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -13,14 +13,16 @@ define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) {
; CHECK-NEXT: dmb ish
; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0]
; CHECK-NEXT: cmp [[SUCCESS]], #0
-; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: BB#2:
-; CHECK-NEXT: dmb ish
; CHECK-NEXT: str r3, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: [[LDFAILBB]]:
; CHECK-NEXT: clrex
-; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: str r3, [r0]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: [[SUCCESSBB]]:
+; CHECK-NEXT: dmb ish
; CHECK-NEXT: str r3, [r0]
; CHECK-NEXT: bx lr
diff --git a/test/CodeGen/ARM/constantpool-promote.ll b/test/CodeGen/ARM/constantpool-promote.ll
index fb1bdfd62fb7c..8df7e100c0514 100644
--- a/test/CodeGen/ARM/constantpool-promote.ll
+++ b/test/CodeGen/ARM/constantpool-promote.ll
@@ -1,10 +1,15 @@
-; RUN: llc -relocation-model=static < %s | FileCheck %s
-; RUN: llc -relocation-model=pic < %s | FileCheck %s
-; RUN: llc -relocation-model=ropi < %s | FileCheck %s
-; RUN: llc -relocation-model=rwpi < %s | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
-target triple = "armv7--linux-gnueabihf"
+; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=static < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM
+; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM
+; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=ropi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM
+; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=rwpi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM
+; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=static < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB
+; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB
+; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=ropi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB
+; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=rwpi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB
+; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=static < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M
+; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M
+; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=ropi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M
+; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=rwpi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M
@.str = private unnamed_addr constant [2 x i8] c"s\00", align 1
@.str1 = private unnamed_addr constant [69 x i8] c"this string is far too long to fit in a literal pool by far and away\00", align 1
@@ -16,6 +21,7 @@ target triple = "armv7--linux-gnueabihf"
@.arr3 = private unnamed_addr constant [2 x i16*] [i16* null, i16* null], align 4
@.ptr = private unnamed_addr constant [2 x i16*] [i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr2, i32 0, i32 0), i16* null], align 2
@.arr4 = private unnamed_addr constant [2 x i16] [i16 3, i16 4], align 16
+@.zerosize = private unnamed_addr constant [0 x i16] zeroinitializer, align 4
; CHECK-LABEL: @test1
; CHECK: adr r0, [[x:.*]]
@@ -134,18 +140,56 @@ define void @test9() #0 {
ret void
}
+; Ensure that zero sized values are supported / not promoted.
+; CHECK-LABEL: @pr32130
+; CHECK-NOT: adr
+define void @pr32130() #0 {
+ tail call void @c(i16* getelementptr inbounds ([0 x i16], [0 x i16]* @.zerosize, i32 0, i32 0)) #2
+ ret void
+}
+
+; CHECK-LABEL: @test10
+; CHECK-V6M: adr r{{[0-9]*}}, [[x:.*]]
+; CHECK-V6M: [[x]]:
+; CHECK-V6M: .asciz "s\000\000"
+; CHECK-V7: ldrb{{(.w)?}} r{{[0-9]*}}, [[x:.*]]
+; CHECK-V7: [[x]]:
+; CHECK-V7: .asciz "s\000\000"
+define void @test10(i8* %a) local_unnamed_addr #0 {
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i32 1, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: @test11
+; CHECK-V6M: adr r{{[0-9]*}}, [[x:.*]]
+; CHECK-V6M: [[x]]:
+; CHECK-V6M: .short 3
+; CHECK-V6M: .short 4
+; CHECK-V7THUMB: ldrh{{(.w)?}} r{{[0-9]*}}, [[x:.*]]
+; CHECK-V7THUMB: [[x]]:
+; CHECK-V7THUMB: .short 3
+; CHECK-V7THUMB: .short 4
+; CHECK-V7ARM: adr r{{[0-9]*}}, [[x:.*]]
+; CHECK-V7ARM: [[x]]:
+; CHECK-V7ARM: .short 3
+; CHECK-V7ARM: .short 4
+define void @test11(i16* %a) local_unnamed_addr #0 {
+ call void @llvm.memmove.p0i16.p0i16.i32(i16* %a, i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0), i32 2, i32 2, i1 false)
+ ret void
+}
+
declare void @b(i8*) #1
declare void @c(i16*) #1
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) local_unnamed_addr
+declare void @llvm.memmove.p0i16.p0i16.i32(i16*, i16*, i32, i32, i1) local_unnamed_addr
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.module.flags = !{!0, !1}
-!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"min_enum_size", i32 4}
-!2 = !{!"Apple LLVM version 6.1.0 (clang-602.0.53) (based on LLVM 3.6.0svn)"}
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index 2987b9a2105aa..197746c5f122b 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -3,8 +3,6 @@
; Test dwarf reg no for s16
;CHECK: super-register DW_OP_regx
;CHECK-NEXT: 264
-;CHECK-NEXT: DW_OP_piece
-;CHECK-NEXT: 4
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
target triple = "thumbv7-apple-macosx10.6.7"
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index b31d1b7bed4f8..094b104997888 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -10,7 +10,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
; CHECK: 0x00000000: Beginning address offset:
; CHECK-NEXT: Ending address offset:
-; CHECK-NEXT: Location description: 90 {{.. .. .. .. $}}
+; CHECK-NEXT: Location description: 90 {{.. .. $}}
define void @_Z3foov() optsize ssp !dbg !1 {
entry:
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 997f50760f3a9..8837315197554 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -10,12 +10,18 @@
; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV
; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 | \
; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=armv7ve-none-linux-gnu | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=thumbv7ve-none-linux-gnu | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV \
+; RUN: -check-prefix=CHECK-THUMB
define i32 @f1(i32 %a, i32 %b) {
entry:
; CHECK-LABEL: f1
; CHECK-SWDIV: __divsi3
+; CHECK-THUMB: .thumb_func
; CHECK-HWDIV: sdiv
; CHECK-EABI: __aeabi_idiv
@@ -28,6 +34,7 @@ entry:
; CHECK-LABEL: f2
; CHECK-SWDIV: __udivsi3
+; CHECK-THUMB: .thumb_func
; CHECK-HWDIV: udiv
; CHECK-EABI: __aeabi_uidiv
@@ -40,6 +47,7 @@ entry:
; CHECK-LABEL: f3
; CHECK-SWDIV: __modsi3
+; CHECK-THUMB: .thumb_func
; CHECK-HWDIV: sdiv
; CHECK-HWDIV: mls
@@ -55,6 +63,7 @@ entry:
; CHECK-LABEL: f4
; CHECK-SWDIV: __umodsi3
+; CHECK-THUMB: .thumb_func
; CHECK-HWDIV: udiv
; CHECK-HWDIV: mls
diff --git a/test/CodeGen/ARM/fast-isel-align.ll b/test/CodeGen/ARM/fast-isel-align.ll
index 701884e926a89..71cd73a4a25d1 100644
--- a/test/CodeGen/ARM/fast-isel-align.ll
+++ b/test/CodeGen/ARM/fast-isel-align.ll
@@ -72,10 +72,10 @@ entry:
%4 = fcmp une float %3, 0.000000e+00
; ARM: ldr r[[R:[0-9]+]], [r0, #2]
; ARM: vmov s0, r[[R]]
-; ARM: vcmpe.f32 s0, #0
+; ARM: vcmp.f32 s0, #0
; THUMB: ldr.w r[[R:[0-9]+]], [r0, #2]
; THUMB: vmov s0, r[[R]]
-; THUMB: vcmpe.f32 s0, #0
+; THUMB: vcmp.f32 s0, #0
ret i1 %4
}
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index a9d7e4580638e..543b6c285f3f7 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -7,8 +7,8 @@ entry:
; ARM: t1a
; THUMB: t1a
%cmp = fcmp oeq float %a, 0.000000e+00
-; ARM: vcmpe.f32 s{{[0-9]+}}, #0
-; THUMB: vcmpe.f32 s{{[0-9]+}}, #0
+; ARM: vcmp.f32 s{{[0-9]+}}, #0
+; THUMB: vcmp.f32 s{{[0-9]+}}, #0
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
@@ -28,9 +28,9 @@ entry:
; THUMB: t1b
%cmp = fcmp oeq float %a, -0.000000e+00
; ARM: vldr
-; ARM: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+; ARM: vcmp.f32 s{{[0-9]+}}, s{{[0-9]+}}
; THUMB: vldr
-; THUMB: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+; THUMB: vcmp.f32 s{{[0-9]+}}, s{{[0-9]+}}
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
@@ -46,8 +46,8 @@ entry:
; ARM: t2a
; THUMB: t2a
%cmp = fcmp oeq double %a, 0.000000e+00
-; ARM: vcmpe.f64 d{{[0-9]+}}, #0
-; THUMB: vcmpe.f64 d{{[0-9]+}}, #0
+; ARM: vcmp.f64 d{{[0-9]+}}, #0
+; THUMB: vcmp.f64 d{{[0-9]+}}, #0
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
@@ -65,9 +65,9 @@ entry:
; THUMB: t2b
%cmp = fcmp oeq double %a, -0.000000e+00
; ARM: vldr
-; ARM: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+; ARM: vcmp.f64 d{{[0-9]+}}, d{{[0-9]+}}
; THUMB: vldr
-; THUMB: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+; THUMB: vcmp.f64 d{{[0-9]+}}, d{{[0-9]+}}
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
index 442459bc0582c..eb32ee54c0959 100644
--- a/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -135,7 +135,7 @@ define void @test_fold_point(i1 %tst) minsize {
; Important to check for beginning of basic block, because if it gets
; if-converted the test is probably no longer checking what it should.
-; CHECK: {{LBB[0-9]+_2}}:
+; CHECK: %end
; CHECK-NEXT: vpop {d7, d8}
; CHECK-NEXT: pop {r4, pc}
diff --git a/test/CodeGen/ARM/fp-only-sp.ll b/test/CodeGen/ARM/fp-only-sp.ll
new file mode 100644
index 0000000000000..2c7b2acbde9c5
--- /dev/null
+++ b/test/CodeGen/ARM/fp-only-sp.ll
@@ -0,0 +1,62 @@
+; RUN: llc -mtriple=thumbv7em-apple-macho -mcpu=cortex-m4 %s -o - -O0 | FileCheck %s
+; RUN: llc -mtriple=thumbv7em-apple-macho -mcpu=cortex-m4 %s -o - | FileCheck %s
+
+; Note: vldr and vstr really do have 64-bit variants even with fp-only-sp
+define void @test_load_store(double* %addr) {
+; CHECK-LABEL: test_load_store:
+; CHECK: vldr [[TMP:d[0-9]+]], [r0]
+; CHECK: vstr [[TMP]], [r0]
+ %val = load volatile double, double* %addr
+ store volatile double %val, double* %addr
+ ret void
+}
+
+define void @test_cmp(double %l, double %r, i1* %addr.dst) {
+; CHECK-LABEL: test_cmp:
+; CHECK: bl ___eqdf2
+ %res = fcmp oeq double %l, %r
+ store i1 %res, i1* %addr.dst
+ ret void
+}
+
+define void @test_ext(float %in, double* %addr) {
+; CHECK-LABEL: test_ext:
+; CHECK: bl ___extendsfdf2
+ %res = fpext float %in to double
+ store double %res, double* %addr
+ ret void
+}
+
+define void @test_trunc(double %in, float* %addr) {
+; CHECK-LABEL: test_trunc:
+; CHECK: bl ___truncdfsf2
+ %res = fptrunc double %in to float
+ store float %res, float* %addr
+ ret void
+}
+
+define void @test_itofp(i32 %in, double* %addr) {
+; CHECK-LABEL: test_itofp:
+; CHECK: bl ___floatsidf
+ %res = sitofp i32 %in to double
+ store double %res, double* %addr
+; %res = fptoui double %tmp to i32
+ ret void
+}
+
+define i32 @test_fptoi(double* %addr) {
+; CHECK-LABEL: test_fptoi:
+; CHECK: bl ___fixunsdfsi
+ %val = load double, double* %addr
+ %res = fptoui double %val to i32
+ ret i32 %res
+}
+
+define void @test_binop(double* %addr) {
+; CHECK-LABEL: test_binop:
+; CHECK: bl ___adddf3
+ %in = load double, double* %addr
+ %res = fadd double %in, %in
+ store double %res, double* %addr
+ ret void
+}
diff --git a/test/CodeGen/ARM/fp16-promote.ll b/test/CodeGen/ARM/fp16-promote.ll
index 8241236872873..9148ac109ae38 100644
--- a/test/CodeGen/ARM/fp16-promote.ll
+++ b/test/CodeGen/ARM/fp16-promote.ll
@@ -161,14 +161,14 @@ define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
ret void
}
-; Test only two variants of fcmp. These get translated to f32 vcmpe
+; Test only two variants of fcmp. These get translated to f32 vcmp
; instructions anyway.
; CHECK-ALL-LABEL: test_fcmp_une:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-VFP: vcmpe.f32
+; CHECK-VFP: vcmp.f32
; CHECK-NOVFP: bl __aeabi_fcmpeq
; CHECK-FP16: vmrs APSR_nzcv, fpscr
; CHECK-ALL: movw{{ne|eq}}
@@ -184,7 +184,7 @@ define i1 @test_fcmp_une(half* %p, half* %q) #0 {
; CHECK-FP16: vcvtb.f32.f16
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-VFP: vcmpe.f32
+; CHECK-VFP: vcmp.f32
; CHECK-NOVFP: bl __aeabi_fcmpeq
; CHECK-FP16: vmrs APSR_nzcv, fpscr
; CHECK-LIBCALL: movw{{ne|eq}}
@@ -597,7 +597,7 @@ define void @test_fma(half* %p, half* %q, half* %r) #0 {
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_fabs:
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: bfc
+; CHECK-LIBCALL: bic
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fabs(half* %p) {
%a = load half, half* %p, align 2
@@ -643,10 +643,11 @@ define void @test_maxnum(half* %p, half* %q) #0 {
}
; CHECK-ALL-LABEL: test_minnan:
-; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vmov.f32 s0, #1.000000e+00
; CHECK-FP16: vcvtb.f32.f16
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL-VFP: vmov.f32 s{{[0-9]+}}, #1.000000e+00
+; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
; CHECK-VFP: vmin.f32
; CHECK-NOVFP: bl __aeabi_fcmpge
; CHECK-FP16: vcvtb.f16.f32
@@ -660,10 +661,11 @@ define void @test_minnan(half* %p) #0 {
}
; CHECK-ALL-LABEL: test_maxnan:
+; CHECK-FP16: vmov.f32 s0, #1.000000e+00
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvtb.f32.f16
-; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL-VFP: vmov.f32 s0, #1.000000e+00
+; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
; CHECK-VFP: vmax.f32
; CHECK-NOVFP: bl __aeabi_fcmple
; CHECK-FP16: vcvtb.f16.f32
@@ -685,7 +687,7 @@ define void @test_maxnan(half* %p) #0 {
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-VFP-LIBCALL: vbsl
-; CHECK-NOVFP: bfc
+; CHECK-NOVFP: bic
; CHECK-NOVFP: and
; CHECK-NOVFP: orr
; CHECK-LIBCALL: bl __aeabi_f2h
@@ -845,21 +847,15 @@ define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
}
; CHECK-ALL-LABEL: test_extractelement:
+; CHECK-VFP: push {{{.*}}, lr}
; CHECK-VFP: sub sp, sp, #8
-; CHECK-VFP: ldrh
-; CHECK-VFP: ldrh
-; CHECK-VFP: orr
-; CHECK-VFP: str
-; CHECK-VFP: ldrh
-; CHECK-VFP: ldrh
-; CHECK-VFP: orr
-; CHECK-VFP: str
+; CHECK-VFP: ldrd
; CHECK-VFP: mov
; CHECK-VFP: orr
; CHECK-VFP: ldrh
; CHECK-VFP: strh
; CHECK-VFP: add sp, sp, #8
-; CHECK-VFP: bx lr
+; CHECK-VFP: pop {{{.*}}, pc}
; CHECK-NOVFP: ldrh
; CHECK-NOVFP: strh
; CHECK-NOVFP: ldrh
diff --git a/test/CodeGen/ARM/fp16-v3.ll b/test/CodeGen/ARM/fp16-v3.ll
index e26455e61e7f0..a37f71d9ba881 100644
--- a/test/CodeGen/ARM/fp16-v3.ll
+++ b/test/CodeGen/ARM/fp16-v3.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "armv7a--none-eabi"
; CHECK-LABEL: test_vec3:
-; CHECK-DAG: vcvtb.f32.f16 [[SREG1:s[0-9]+]],
+; CHECK-DAG: vmov.f32 [[SREG1:s[0-9]+]], #1.200000e+01
; CHECK-DAG: vcvt.f32.s32 [[SREG2:s[0-9]+]],
; CHECK-DAG: vcvtb.f16.f32 [[SREG3:s[0-9]+]], [[SREG2]]
; CHECK-DAG: vcvtb.f32.f16 [[SREG4:s[0-9]+]], [[SREG3]]
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 45bb6d2f702d0..a828541094507 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -10,7 +10,7 @@ entry:
; CHECK-LABEL: t1:
; CHECK: vldr [[S0:s[0-9]+]],
; CHECK: vldr [[S1:s[0-9]+]],
-; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vcmp.f32 [[S1]], [[S0]]
; CHECK: vmrs APSR_nzcv, fpscr
; CHECK: beq
%0 = load float, float* %a
@@ -35,10 +35,10 @@ entry:
; CHECK-NOT: vldr
; CHECK: ldrd [[REG1:(r[0-9]+)]], [[REG2:(r[0-9]+)]], [r0]
; CHECK-NOT: b LBB
-; CHECK: bfc [[REG2]], #31, #1
+; CHECK: bic [[REG2]], [[REG2]], #-2147483648
; CHECK: cmp [[REG1]], #0
; CHECK: cmpeq [[REG2]], #0
-; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vcmp.f32
; CHECK-NOT: vmrs
; CHECK: bne
%0 = load double, double* %a
@@ -61,7 +61,7 @@ entry:
; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
; CHECK: tst [[REG3]], [[REG4]]
-; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vcmp.f32
; CHECK-NOT: vmrs
; CHECK: bne
%0 = load float, float* %a
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
index e3ffd45a396d8..67326e0001697 100644
--- a/test/CodeGen/ARM/fpcmp.ll
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -12,7 +12,7 @@ entry:
define i32 @f2(float %a) {
;CHECK-LABEL: f2:
-;CHECK: vcmpe.f32
+;CHECK: vcmp.f32
;CHECK: moveq
entry:
%tmp = fcmp oeq float %a, 1.000000e+00 ; <i1> [#uses=1]
@@ -52,7 +52,7 @@ entry:
define i32 @f6(float %a) {
;CHECK-LABEL: f6:
-;CHECK: vcmpe.f32
+;CHECK: vcmp.f32
;CHECK: movne
entry:
%tmp = fcmp une float %a, 1.000000e+00 ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index c1696c9be1b7c..698c7506cc593 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -17,7 +17,7 @@ entry:
; CHECK-ARMv4: moveq r0, #42
; CHECK-ARMv7-LABEL: f7:
-; CHECK-ARMv7: vcmpe.f32
+; CHECK-ARMv7: vcmp.f32
; CHECK-ARMv7: vmrs APSR_nzcv, fpscr
; CHECK-ARMv7: movweq
; CHECK-ARMv7-NOT: vmrs
diff --git a/test/CodeGen/ARM/fpscr-intrinsics.ll b/test/CodeGen/ARM/fpscr-intrinsics.ll
new file mode 100644
index 0000000000000..64b97525febfe
--- /dev/null
+++ b/test/CodeGen/ARM/fpscr-intrinsics.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -mtriple=armv7-eabi -mcpu=cortex-a8 -mattr=+neon,+fp-armv8 | FileCheck %s
+; RUN: llc < %s -O3 -mtriple=armv7-eabi -mcpu=cortex-a8 -mattr=+neon,+fp-armv8 | FileCheck %s
+
+@a = common global double 0.000000e+00, align 8
+
+; Function Attrs: noinline nounwind uwtable
+define void @strtod() {
+entry:
+ ; CHECK: vmrs r{{[0-9]+}}, fpscr
+ %0 = call i32 @llvm.flt.rounds()
+ %tobool = icmp ne i32 %0, 0
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store double 5.000000e-01, double* @a, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @fn1(i32* nocapture %p) local_unnamed_addr {
+entry:
+ ; CHECK: vmrs r{{[0-9]+}}, fpscr
+ %0 = tail call i32 @llvm.arm.get.fpscr()
+ store i32 %0, i32* %p, align 4
+ ; CHECK: vmsr fpscr, r{{[0-9]+}}
+ tail call void @llvm.arm.set.fpscr(i32 1)
+ ; CHECK: vmrs r{{[0-9]+}}, fpscr
+ %1 = tail call i32 @llvm.arm.get.fpscr()
+ %arrayidx1 = getelementptr inbounds i32, i32* %p, i32 1
+ store i32 %1, i32* %arrayidx1, align 4
+ ret void
+}
+
+; Function Attrs: nounwind readonly
+declare i32 @llvm.arm.get.fpscr()
+
+; Function Attrs: nounwind writeonly
+declare void @llvm.arm.set.fpscr(i32)
+
+; Function Attrs: nounwind
+declare i32 @llvm.flt.rounds()
diff --git a/test/CodeGen/ARM/gpr-paired-spill.ll b/test/CodeGen/ARM/gpr-paired-spill.ll
index ef3e5a54a2dbe..797b147d5d016 100644
--- a/test/CodeGen/ARM/gpr-paired-spill.ll
+++ b/test/CodeGen/ARM/gpr-paired-spill.ll
@@ -16,22 +16,22 @@ define void @foo(i64* %addr) {
; an LDMIA was created with both a FrameIndex and an offset, which
; is not allowed.
-; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
-; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
+; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
+; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
-; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
-; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
+; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
+; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
; We also want to ensure the register scavenger is working (i.e. an
; offset from sp can be generated), so we need two spills.
-; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
-; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
-; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD-DAG: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
+; CHECK-WITHOUT-LDRD-DAG: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD-DAG: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
; In principle LLVM may have to recalculate the offset. At the moment
; it reuses the original though.
-; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
-; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD-DAG: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD-DAG: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
store volatile i64 %val1, i64* %addr
store volatile i64 %val2, i64* %addr
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 5725a404c3201..c7e18d35dbee1 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -10,8 +10,6 @@ entry:
; CHECK: vpop {d8}
; CHECK-NOT: vpopne
; CHECK: pop {r7, pc}
-; CHECK: vpop {d8}
-; CHECK: pop {r7, pc}
br i1 undef, label %if.else, label %if.then
if.then: ; preds = %entry
diff --git a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
new file mode 100644
index 0000000000000..74117d3896bdc
--- /dev/null
+++ b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
@@ -0,0 +1,184 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-eabi | FileCheck %s -check-prefix=LE
+; RUN: llc < %s -mtriple=armeb-eabi | FileCheck %s -check-prefix=BE
+
+define void @i24_or(i24* %a) {
+; LE-LABEL: i24_or:
+; LE: @ BB#0:
+; LE-NEXT: ldrh r1, [r0]
+; LE-NEXT: orr r1, r1, #384
+; LE-NEXT: strh r1, [r0]
+; LE-NEXT: mov pc, lr
+;
+; BE-LABEL: i24_or:
+; BE: @ BB#0:
+; BE-NEXT: ldrh r1, [r0]
+; BE-NEXT: ldrb r2, [r0, #2]
+; BE-NEXT: orr r1, r2, r1, lsl #8
+; BE-NEXT: orr r1, r1, #384
+; BE-NEXT: strb r1, [r0, #2]
+; BE-NEXT: lsr r1, r1, #8
+; BE-NEXT: strh r1, [r0]
+; BE-NEXT: mov pc, lr
+ %aa = load i24, i24* %a, align 1
+ %b = or i24 %aa, 384
+ store i24 %b, i24* %a, align 1
+ ret void
+}
+
+define void @i24_and_or(i24* %a) {
+; LE-LABEL: i24_and_or:
+; LE: @ BB#0:
+; LE-NEXT: ldrh r1, [r0]
+; LE-NEXT: mov r2, #16256
+; LE-NEXT: orr r2, r2, #49152
+; LE-NEXT: orr r1, r1, #384
+; LE-NEXT: and r1, r1, r2
+; LE-NEXT: strh r1, [r0]
+; LE-NEXT: mov pc, lr
+;
+; BE-LABEL: i24_and_or:
+; BE: @ BB#0:
+; BE-NEXT: mov r1, #128
+; BE-NEXT: strb r1, [r0, #2]
+; BE-NEXT: ldrh r1, [r0]
+; BE-NEXT: orr r1, r1, #1
+; BE-NEXT: strh r1, [r0]
+; BE-NEXT: mov pc, lr
+ %b = load i24, i24* %a, align 1
+ %c = and i24 %b, -128
+ %d = or i24 %c, 384
+ store i24 %d, i24* %a, align 1
+ ret void
+}
+
+define void @i24_insert_bit(i24* %a, i1 zeroext %bit) {
+; LE-LABEL: i24_insert_bit:
+; LE: @ BB#0:
+; LE-NEXT: ldrh r2, [r0]
+; LE-NEXT: mov r3, #255
+; LE-NEXT: orr r3, r3, #57088
+; LE-NEXT: and r2, r2, r3
+; LE-NEXT: orr r1, r2, r1, lsl #13
+; LE-NEXT: strh r1, [r0]
+; LE-NEXT: mov pc, lr
+;
+; BE-LABEL: i24_insert_bit:
+; BE: @ BB#0:
+; BE-NEXT: ldrh r2, [r0]
+; BE-NEXT: mov r3, #57088
+; BE-NEXT: orr r3, r3, #16711680
+; BE-NEXT: and r2, r3, r2, lsl #8
+; BE-NEXT: orr r1, r2, r1, lsl #13
+; BE-NEXT: lsr r1, r1, #8
+; BE-NEXT: strh r1, [r0]
+; BE-NEXT: mov pc, lr
+ %extbit = zext i1 %bit to i24
+ %b = load i24, i24* %a, align 1
+ %extbit.shl = shl nuw nsw i24 %extbit, 13
+ %c = and i24 %b, -8193
+ %d = or i24 %c, %extbit.shl
+ store i24 %d, i24* %a, align 1
+ ret void
+}
+
+define void @i56_or(i56* %a) {
+; LE-LABEL: i56_or:
+; LE: @ BB#0:
+; LE-NEXT: ldr r1, [r0]
+; LE-NEXT: orr r1, r1, #384
+; LE-NEXT: str r1, [r0]
+; LE-NEXT: mov pc, lr
+;
+; BE-LABEL: i56_or:
+; BE: @ BB#0:
+; BE-NEXT: mov r1, r0
+; BE-NEXT: ldr r12, [r0]
+; BE-NEXT: ldrh r2, [r1, #4]!
+; BE-NEXT: ldrb r3, [r1, #2]
+; BE-NEXT: orr r2, r3, r2, lsl #8
+; BE-NEXT: orr r2, r2, r12, lsl #24
+; BE-NEXT: orr r2, r2, #384
+; BE-NEXT: lsr r3, r2, #8
+; BE-NEXT: strb r2, [r1, #2]
+; BE-NEXT: strh r3, [r1]
+; BE-NEXT: bic r1, r12, #255
+; BE-NEXT: orr r1, r1, r2, lsr #24
+; BE-NEXT: str r1, [r0]
+; BE-NEXT: mov pc, lr
+ %aa = load i56, i56* %a
+ %b = or i56 %aa, 384
+ store i56 %b, i56* %a
+ ret void
+}
+
+define void @i56_and_or(i56* %a) {
+; LE-LABEL: i56_and_or:
+; LE: @ BB#0:
+; LE-NEXT: ldr r1, [r0]
+; LE-NEXT: orr r1, r1, #384
+; LE-NEXT: bic r1, r1, #127
+; LE-NEXT: str r1, [r0]
+; LE-NEXT: mov pc, lr
+;
+; BE-LABEL: i56_and_or:
+; BE: @ BB#0:
+; BE-NEXT: mov r1, r0
+; BE-NEXT: mov r3, #128
+; BE-NEXT: ldrh r2, [r1, #4]!
+; BE-NEXT: strb r3, [r1, #2]
+; BE-NEXT: lsl r2, r2, #8
+; BE-NEXT: ldr r12, [r0]
+; BE-NEXT: orr r2, r2, r12, lsl #24
+; BE-NEXT: orr r2, r2, #384
+; BE-NEXT: lsr r3, r2, #8
+; BE-NEXT: strh r3, [r1]
+; BE-NEXT: bic r1, r12, #255
+; BE-NEXT: orr r1, r1, r2, lsr #24
+; BE-NEXT: str r1, [r0]
+; BE-NEXT: mov pc, lr
+
+ %b = load i56, i56* %a, align 1
+ %c = and i56 %b, -128
+ %d = or i56 %c, 384
+ store i56 %d, i56* %a, align 1
+ ret void
+}
+
+define void @i56_insert_bit(i56* %a, i1 zeroext %bit) {
+; LE-LABEL: i56_insert_bit:
+; LE: @ BB#0:
+; LE-NEXT: ldr r2, [r0]
+; LE-NEXT: bic r2, r2, #8192
+; LE-NEXT: orr r1, r2, r1, lsl #13
+; LE-NEXT: str r1, [r0]
+; LE-NEXT: mov pc, lr
+;
+; BE-LABEL: i56_insert_bit:
+; BE: @ BB#0:
+; BE-NEXT: .save {r11, lr}
+; BE-NEXT: push {r11, lr}
+; BE-NEXT: mov r2, r0
+; BE-NEXT: ldr lr, [r0]
+; BE-NEXT: ldrh r12, [r2, #4]!
+; BE-NEXT: ldrb r3, [r2, #2]
+; BE-NEXT: orr r12, r3, r12, lsl #8
+; BE-NEXT: orr r3, r12, lr, lsl #24
+; BE-NEXT: bic r3, r3, #8192
+; BE-NEXT: orr r1, r3, r1, lsl #13
+; BE-NEXT: lsr r3, r1, #8
+; BE-NEXT: strh r3, [r2]
+; BE-NEXT: bic r2, lr, #255
+; BE-NEXT: orr r1, r2, r1, lsr #24
+; BE-NEXT: str r1, [r0]
+; BE-NEXT: pop {r11, lr}
+; BE-NEXT: mov pc, lr
+ %extbit = zext i1 %bit to i56
+ %b = load i56, i56* %a, align 1
+ %extbit.shl = shl nuw nsw i56 %extbit, 13
+ %c = and i56 %b, -8193
+ %d = or i56 %c, %extbit.shl
+ store i56 %d, i56* %a, align 1
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index d15ef14b44932..90defad43a7d8 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -47,6 +47,7 @@ L3: ; preds = %L4, %bb2
br label %L2
L2: ; preds = %L3, %bb2
+; THUMB-LABEL: %L1.clone
; THUMB: muls
%res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ] ; <i32> [#uses=1]
%phitmp = mul i32 %res.2, 6 ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/interval-update-remat.ll b/test/CodeGen/ARM/interval-update-remat.ll
index 6391d4c29604f..524e8a0aa491a 100644
--- a/test/CodeGen/ARM/interval-update-remat.ll
+++ b/test/CodeGen/ARM/interval-update-remat.ll
@@ -109,7 +109,7 @@ _ZN7MessageD1Ev.exit: ; preds = %if.then.i.i.i.i, %i
}
; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
declare %class.StaticSocketDataProvider.6.231.281.1306.2331* @_ZN24StaticSocketDataProviderC1EP13MockReadWritejS1_j(%class.StaticSocketDataProvider.6.231.281.1306.2331* returned, %struct.MockReadWrite.7.232.282.1307.2332*, i32, %struct.MockReadWrite.7.232.282.1307.2332*, i32) unnamed_addr
@@ -130,7 +130,7 @@ declare %class.Message.13.238.288.1313.2338* @_ZN7MessageC1Ev(%class.Message.13.
declare %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperD1Ev(%class.AssertHelper.10.235.285.1310.2335* returned) unnamed_addr
; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
declare void @_ZN18ClientSocketHandle5m_fn3IPiEEvRK25Trans_NS___1_basic_stringIciiE13scoped_refptr15RequestPriorityN16ClientSocketPool13RespectLimitsERiT_11BoundNetLog(%class.ClientSocketHandle.14.239.289.1314.2339*, %class.Trans_NS___1_basic_string.18.243.293.1318.2343* dereferenceable(12), %class.scoped_refptr.19.244.294.1319.2344*, i32, i32, i32* dereferenceable(4), i32*, %class.BoundNetLog.20.245.295.1320.2345*)
diff --git a/test/CodeGen/ARM/intrinsics-coprocessor.ll b/test/CodeGen/ARM/intrinsics-coprocessor.ll
index 8fea49b39fb60..5352471238f9c 100644
--- a/test/CodeGen/ARM/intrinsics-coprocessor.ll
+++ b/test/CodeGen/ARM/intrinsics-coprocessor.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8 | FileCheck %s
-; RUN: llc < %s -march=thumb -mtriple=thumbv7-eabi -mcpu=cortex-a8 | FileCheck %s
define void @coproc(i8* %i) nounwind {
entry:
diff --git a/test/CodeGen/ARM/ldm-stm-i256.ll b/test/CodeGen/ARM/ldm-stm-i256.ll
new file mode 100644
index 0000000000000..7b4151dabf6dd
--- /dev/null
+++ b/test/CodeGen/ARM/ldm-stm-i256.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple=armv7--eabi -verify-machineinstrs < %s | FileCheck %s
+
+; Check the way we schedule/merge a bunch of loads and stores.
+; Originally test/CodeGen/ARM/2011-07-07-ScheduleDAGCrash.ll ; now
+; being used as a test of optimizations related to ldm/stm.
+
+; FIXME: We could merge more loads/stores with regalloc hints.
+; FIXME: Fix scheduling so we don't have 16 live registers.
+
+define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
+entry:
+ %c = load i256, i256* %cc
+ %d = load i256, i256* %dd
+ %add = add nsw i256 %c, %d
+ store i256 %add, i256* %a, align 8
+ %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376
+ %add6 = add nsw i256 %or, %d
+ store i256 %add6, i256* %b, align 8
+ ret void
+ ; CHECK-DAG: ldm r3
+ ; CHECK-DAG: ldm r2
+ ; CHECK-DAG: ldr {{.*}}, [r3, #20]
+ ; CHECK-DAG: ldr {{.*}}, [r3, #16]
+ ; CHECK-DAG: ldr {{.*}}, [r3, #28]
+ ; CHECK-DAG: ldr {{.*}}, [r3, #24]
+ ; CHECK-DAG: ldr {{.*}}, [r2, #20]
+ ; CHECK-DAG: ldr {{.*}}, [r2, #16]
+ ; CHECK-DAG: ldr {{.*}}, [r2, #28]
+ ; CHECK-DAG: ldr {{.*}}, [r2, #24]
+ ; CHECK-DAG: stmib r0
+ ; CHECK-DAG: str {{.*}}, [r0]
+ ; CHECK-DAG: str {{.*}}, [r0, #24]
+ ; CHECK-DAG: str {{.*}}, [r0, #28]
+ ; CHECK-DAG: str {{.*}}, [r1]
+ ; CHECK-DAG: stmib r1
+ ; CHECK-DAG: str {{.*}}, [r1, #24]
+ ; CHECK-DAG: str {{.*}}, [r1, #28]
+}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 6a9e63f649c93..6981cfcb08550 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -80,7 +80,7 @@ return: ; preds = %bb, %entry
; CHECK-LABEL: Func1:
define void @Func1() nounwind ssp "no-frame-pointer-elim"="true" {
-entry:
+entry:
; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}}
; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}}
; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4]
@@ -88,12 +88,12 @@ entry:
; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}}
; CONSERVATIVE-NOT: ldrd
%orig_blocks = alloca [256 x i16], align 2
- %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start(i64 512, i8* %0) nounwind
+ %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start.p0i8(i64 512, i8* %0) nounwind
%tmp1 = load i32, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 1), align 4
%tmp2 = load i32, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 2), align 4
%add = add nsw i32 %tmp2, %tmp1
store i32 %add, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 0), align 4
- call void @llvm.lifetime.end(i64 512, i8* %0) nounwind
+ call void @llvm.lifetime.end.p0i8(i64 512, i8* %0) nounwind
ret void
}
@@ -189,5 +189,23 @@ define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) "no-frame-pointer-e
ret i32* %p1
}
-declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+; CHECK-LABEL: ldrd_strd_aa:
+; NORMAL: ldrd [[TMP1:r[0-9]]], [[TMP2:r[0-9]]],
+; NORMAL: strd [[TMP1]], [[TMP2]],
+; CONSERVATIVE-NOT: ldrd
+; CONSERVATIVE-NOT: strd
+; CHECK: bx lr
+
+define void @ldrd_strd_aa(i32* noalias nocapture %x, i32* noalias nocapture readonly %y) {
+entry:
+ %0 = load i32, i32* %y, align 4
+ store i32 %0, i32* %x, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %y, i32 1
+ %1 = load i32, i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %x, i32 1
+ store i32 %1, i32* %arrayidx3, align 4
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/ARM/load-combine-big-endian.ll b/test/CodeGen/ARM/load-combine-big-endian.ll
new file mode 100644
index 0000000000000..8d8a0136cf962
--- /dev/null
+++ b/test/CodeGen/ARM/load-combine-big-endian.ll
@@ -0,0 +1,779 @@
+; RUN: llc < %s -mtriple=armeb-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=armv6eb-unknown | FileCheck %s --check-prefix=CHECK-ARMv6
+
+; i8* p; // p is 4 byte aligned
+; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
+define i32 @load_i32_by_i8_big_endian(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_big_endian:
+; CHECK: ldr r0, [r0]
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_big_endian:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = load i8, i8* %tmp, align 4
+ %tmp2 = zext i8 %tmp1 to i32
+ %tmp3 = shl nuw nsw i32 %tmp2, 24
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 16
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 8
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = or i32 %tmp13, %tmp16
+ ret i32 %tmp17
+}
+
+; i8* p; // p is 4 byte aligned
+; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
+define i32 @load_i32_by_i8_bswap(i32* %arg) {
+; BSWAP is not supported by 32 bit target
+; CHECK-LABEL: load_i32_by_i8_bswap:
+; CHECK: ldr r0, [r0]
+; CHECK: and
+; CHECK-NEXT: and
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p is 4 byte aligned
+; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
+define i32 @load_i32_by_i16_by_i8_big_endian(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian:
+; CHECK: ldr r0, [r0]
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i16_by_i8_big_endian:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = load i8, i8* %tmp, align 4
+ %tmp2 = zext i8 %tmp1 to i16
+ %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp4 = load i8, i8* %tmp3, align 1
+ %tmp5 = zext i8 %tmp4 to i16
+ %tmp6 = shl nuw nsw i16 %tmp2, 8
+ %tmp7 = or i16 %tmp6, %tmp5
+ %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp9 = load i8, i8* %tmp8, align 1
+ %tmp10 = zext i8 %tmp9 to i16
+ %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp12 = load i8, i8* %tmp11, align 1
+ %tmp13 = zext i8 %tmp12 to i16
+ %tmp14 = shl nuw nsw i16 %tmp10, 8
+ %tmp15 = or i16 %tmp14, %tmp13
+ %tmp16 = zext i16 %tmp7 to i32
+ %tmp17 = zext i16 %tmp15 to i32
+ %tmp18 = shl nuw nsw i32 %tmp16, 16
+ %tmp19 = or i32 %tmp18, %tmp17
+ ret i32 %tmp19
+}
+
+; i16* p; // p is 4 byte aligned
+; ((i32) p[0] << 16) | (i32) p[1]
+define i32 @load_i32_by_i16(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i16:
+; CHECK: ldr r0, [r0]
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i16:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = bitcast i32* %arg to i16*
+ %tmp1 = load i16, i16* %tmp, align 4
+ %tmp2 = zext i16 %tmp1 to i32
+ %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
+ %tmp4 = load i16, i16* %tmp3, align 1
+ %tmp5 = zext i16 %tmp4 to i32
+ %tmp6 = shl nuw nsw i32 %tmp2, 16
+ %tmp7 = or i32 %tmp6, %tmp5
+ ret i32 %tmp7
+}
+
+; i16* p_16; // p_16 is 4 byte aligned
+; i8* p_8 = (i8*) p_16;
+; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3]
+define i32 @load_i32_by_i16_i8(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i16_i8:
+; CHECK: ldr r0, [r0]
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i16_i8:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = bitcast i32* %arg to i16*
+ %tmp1 = bitcast i32* %arg to i8*
+ %tmp2 = load i16, i16* %tmp, align 4
+ %tmp3 = zext i16 %tmp2 to i32
+ %tmp4 = shl nuw nsw i32 %tmp3, 16
+ %tmp5 = getelementptr inbounds i8, i8* %tmp1, i32 2
+ %tmp6 = load i8, i8* %tmp5, align 1
+ %tmp7 = zext i8 %tmp6 to i32
+ %tmp8 = shl nuw nsw i32 %tmp7, 8
+ %tmp9 = getelementptr inbounds i8, i8* %tmp1, i32 3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = or i32 %tmp8, %tmp11
+ %tmp13 = or i32 %tmp12, %tmp4
+ ret i32 %tmp13
+}
+
+; i8* p; // p is 8 byte aligned
+; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
+define i64 @load_i64_by_i8_bswap(i64* %arg) {
+; CHECK-LABEL: load_i64_by_i8_bswap:
+; CHECK: ldr{{.*}}r0
+; CHECK: ldr{{.*}}r0
+; CHECK: and
+; CHECK-NEXT: and
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK-NEXT: and
+; CHECK-NEXT: orr
+; CHECK-NEXT: and
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
+; CHECK-ARMv6: ldrd r2, r3, [r0]
+; CHECK-ARMv6: rev r0, r3
+; CHECK-ARMv6: rev r1, r2
+; CHECK-ARMv6: bx lr
+ %tmp = bitcast i64* %arg to i8*
+ %tmp1 = load i8, i8* %tmp, align 8
+ %tmp2 = zext i8 %tmp1 to i64
+ %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
+ %tmp4 = load i8, i8* %tmp3, align 1
+ %tmp5 = zext i8 %tmp4 to i64
+ %tmp6 = shl nuw nsw i64 %tmp5, 8
+ %tmp7 = or i64 %tmp6, %tmp2
+ %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
+ %tmp9 = load i8, i8* %tmp8, align 1
+ %tmp10 = zext i8 %tmp9 to i64
+ %tmp11 = shl nuw nsw i64 %tmp10, 16
+ %tmp12 = or i64 %tmp7, %tmp11
+ %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
+ %tmp14 = load i8, i8* %tmp13, align 1
+ %tmp15 = zext i8 %tmp14 to i64
+ %tmp16 = shl nuw nsw i64 %tmp15, 24
+ %tmp17 = or i64 %tmp12, %tmp16
+ %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
+ %tmp19 = load i8, i8* %tmp18, align 1
+ %tmp20 = zext i8 %tmp19 to i64
+ %tmp21 = shl nuw nsw i64 %tmp20, 32
+ %tmp22 = or i64 %tmp17, %tmp21
+ %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
+ %tmp24 = load i8, i8* %tmp23, align 1
+ %tmp25 = zext i8 %tmp24 to i64
+ %tmp26 = shl nuw nsw i64 %tmp25, 40
+ %tmp27 = or i64 %tmp22, %tmp26
+ %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
+ %tmp29 = load i8, i8* %tmp28, align 1
+ %tmp30 = zext i8 %tmp29 to i64
+ %tmp31 = shl nuw nsw i64 %tmp30, 48
+ %tmp32 = or i64 %tmp27, %tmp31
+ %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
+ %tmp34 = load i8, i8* %tmp33, align 1
+ %tmp35 = zext i8 %tmp34 to i64
+ %tmp36 = shl nuw i64 %tmp35, 56
+ %tmp37 = or i64 %tmp32, %tmp36
+ ret i64 %tmp37
+}
+
+; i8* p; // p is 8 byte aligned
+; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
+define i64 @load_i64_by_i8(i64* %arg) {
+; CHECK-LABEL: load_i64_by_i8:
+; CHECK: ldr r2, [r0]
+; CHECK: ldr r1, [r0, #4]
+; CHECK: mov r0, r2
+; CHECK: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i64_by_i8:
+; CHECK-ARMv6: ldrd r0, r1, [r0]
+; CHECK-ARMv6: bx lr
+ %tmp = bitcast i64* %arg to i8*
+ %tmp1 = load i8, i8* %tmp, align 8
+ %tmp2 = zext i8 %tmp1 to i64
+ %tmp3 = shl nuw i64 %tmp2, 56
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i64
+ %tmp7 = shl nuw nsw i64 %tmp6, 48
+ %tmp8 = or i64 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i64
+ %tmp12 = shl nuw nsw i64 %tmp11, 40
+ %tmp13 = or i64 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i64
+ %tmp17 = shl nuw nsw i64 %tmp16, 32
+ %tmp18 = or i64 %tmp13, %tmp17
+ %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
+ %tmp20 = load i8, i8* %tmp19, align 1
+ %tmp21 = zext i8 %tmp20 to i64
+ %tmp22 = shl nuw nsw i64 %tmp21, 24
+ %tmp23 = or i64 %tmp18, %tmp22
+ %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
+ %tmp25 = load i8, i8* %tmp24, align 1
+ %tmp26 = zext i8 %tmp25 to i64
+ %tmp27 = shl nuw nsw i64 %tmp26, 16
+ %tmp28 = or i64 %tmp23, %tmp27
+ %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
+ %tmp30 = load i8, i8* %tmp29, align 1
+ %tmp31 = zext i8 %tmp30 to i64
+ %tmp32 = shl nuw nsw i64 %tmp31, 8
+ %tmp33 = or i64 %tmp28, %tmp32
+ %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
+ %tmp35 = load i8, i8* %tmp34, align 1
+ %tmp36 = zext i8 %tmp35 to i64
+ %tmp37 = or i64 %tmp33, %tmp36
+ ret i64 %tmp37
+}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
+define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK: ldr r0, [r0, #1]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK-ARMv6: ldr r0, [r0, #1]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
+define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset:
+; CHECK: ldr r0, [r0, #-4]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
+; CHECK-ARMv6: ldr r0, [r0, #-4]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
+define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK: ldr r0, [r0, #1]
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK-ARMv6: ldr r0, [r0, #1]
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
+define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK: ldr r0, [r0, #-4]
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK-ARMv6: ldr r0, [r0, #-4]
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+declare i16 @llvm.bswap.i16(i16)
+
+; i16* p; // p is 4 byte aligned
+; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16)
+define i32 @load_i32_by_bswap_i16(i32* %arg) {
+; CHECK-LABEL: load_i32_by_bswap_i16:
+; CHECK: ldr r0, [r0]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i16*
+ %tmp1 = load i16, i16* %tmp, align 4
+ %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
+ %tmp2 = zext i16 %tmp11 to i32
+ %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
+ %tmp4 = load i16, i16* %tmp3, align 1
+ %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
+ %tmp5 = zext i16 %tmp41 to i32
+ %tmp6 = shl nuw nsw i32 %tmp5, 16
+ %tmp7 = or i32 %tmp6, %tmp2
+ ret i32 %tmp7
+}
+
+; i16* p; // p is 4 byte aligned
+; (i32) p[1] | (sext(p[0] << 16) to i32)
+define i32 @load_i32_by_sext_i16(i32* %arg) {
+; CHECK-LABEL: load_i32_by_sext_i16:
+; CHECK: ldr r0, [r0]
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: load_i32_by_sext_i16:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = bitcast i32* %arg to i16*
+ %tmp1 = load i16, i16* %tmp, align 4
+ %tmp2 = sext i16 %tmp1 to i32
+ %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
+ %tmp4 = load i16, i16* %tmp3, align 1
+ %tmp5 = zext i16 %tmp4 to i32
+ %tmp6 = shl nuw nsw i32 %tmp2, 16
+ %tmp7 = or i32 %tmp6, %tmp5
+ ret i32 %tmp7
+}
+
+; i8* arg; i32 i;
+; p = arg + 12;
+; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
+define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
+; CHECK-LABEL: load_i32_by_i8_base_offset_index:
+; CHECK: add r0, r0, r1
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: ldr r0, [r0, #12]
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
+; CHECK-ARMv6: add r0, r0, r1
+; CHECK-ARMv6-NEXT: ldr r0, [r0, #12]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = add nuw nsw i32 %i, 3
+ %tmp2 = add nuw nsw i32 %i, 2
+ %tmp3 = add nuw nsw i32 %i, 1
+ %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
+ %tmp5 = zext i32 %i to i64
+ %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
+ %tmp7 = load i8, i8* %tmp6, align 4
+ %tmp8 = zext i8 %tmp7 to i32
+ %tmp9 = zext i32 %tmp3 to i64
+ %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
+ %tmp11 = load i8, i8* %tmp10, align 1
+ %tmp12 = zext i8 %tmp11 to i32
+ %tmp13 = shl nuw nsw i32 %tmp12, 8
+ %tmp14 = or i32 %tmp13, %tmp8
+ %tmp15 = zext i32 %tmp2 to i64
+ %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
+ %tmp17 = load i8, i8* %tmp16, align 1
+ %tmp18 = zext i8 %tmp17 to i32
+ %tmp19 = shl nuw nsw i32 %tmp18, 16
+ %tmp20 = or i32 %tmp14, %tmp19
+ %tmp21 = zext i32 %tmp to i64
+ %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
+ %tmp23 = load i8, i8* %tmp22, align 1
+ %tmp24 = zext i8 %tmp23 to i32
+ %tmp25 = shl nuw i32 %tmp24, 24
+ %tmp26 = or i32 %tmp20, %tmp25
+ ret i32 %tmp26
+}
+
+; i8* arg; i32 i;
+; p = arg + 12;
+; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
+define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
+; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
+; CHECK: add r0, r0, r1
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: ldr r0, [r0, #13]
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
+; CHECK-ARMv6: add r0, r0, r1
+; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = add nuw nsw i32 %i, 4
+ %tmp2 = add nuw nsw i32 %i, 3
+ %tmp3 = add nuw nsw i32 %i, 2
+ %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
+ %tmp5 = add nuw nsw i32 %i, 1
+ %tmp27 = zext i32 %tmp5 to i64
+ %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
+ %tmp29 = load i8, i8* %tmp28, align 4
+ %tmp30 = zext i8 %tmp29 to i32
+ %tmp31 = zext i32 %tmp3 to i64
+ %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
+ %tmp33 = load i8, i8* %tmp32, align 1
+ %tmp34 = zext i8 %tmp33 to i32
+ %tmp35 = shl nuw nsw i32 %tmp34, 8
+ %tmp36 = or i32 %tmp35, %tmp30
+ %tmp37 = zext i32 %tmp2 to i64
+ %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
+ %tmp39 = load i8, i8* %tmp38, align 1
+ %tmp40 = zext i8 %tmp39 to i32
+ %tmp41 = shl nuw nsw i32 %tmp40, 16
+ %tmp42 = or i32 %tmp36, %tmp41
+ %tmp43 = zext i32 %tmp to i64
+ %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
+ %tmp45 = load i8, i8* %tmp44, align 1
+ %tmp46 = zext i8 %tmp45 to i32
+ %tmp47 = shl nuw i32 %tmp46, 24
+ %tmp48 = or i32 %tmp42, %tmp47
+ ret i32 %tmp48
+}
+
+; i8* p; // p is 2 byte aligned
+; (i32) p[0] | ((i32) p[1] << 8)
+define i32 @zext_load_i32_by_i8(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: orr r0, r1, r0, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp1, align 2
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[0] << 8) | ((i32) p[1] << 16)
+define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: lsl r0, r0, #16
+; CHECK-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: lsl r0, r0, #16
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp1, align 2
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 8
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 16
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[0] << 16) | ((i32) p[1] << 24)
+define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: lsl r0, r0, #24
+; CHECK-NEXT: orr r0, r0, r1, lsl #16
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: lsl r0, r0, #24
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #16
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp1, align 2
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 16
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 24
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; (i32) p[1] | ((i32) p[0] << 8)
+define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_bswap:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp5 = load i8, i8* %tmp4, align 2
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[1] << 8) | ((i32) p[0] << 16)
+define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: lsl r1, r1, #16
+; CHECK-NEXT: orr r0, r1, r0, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: lsl r1, r1, #16
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 8
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp5 = load i8, i8* %tmp4, align 2
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 16
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[1] << 16) | ((i32) p[0] << 24)
+define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: lsl r1, r1, #24
+; CHECK-NEXT: orr r0, r1, r0, lsl #16
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: lsl r1, r1, #24
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #16
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 16
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp5 = load i8, i8* %tmp4, align 2
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 24
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+
+; i8* p;
+; i16* p1.i16 = (i16*) p;
+; (p1.i16[0] << 8) | ((i16) p[2])
+;
+; This is essentialy a i16 load from p[1], but we don't fold the pattern now
+; because in the original DAG we don't have p[1] address available
+define i16 @load_i16_from_nonzero_offset(i8* %p) {
+; CHECK-LABEL: load_i16_from_nonzero_offset:
+; CHECK: ldrh r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #2]
+; CHECK-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset:
+; CHECK-ARMv6: ldrh r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #2]
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-ARMv6-NEXT: bx lr
+
+ %p1.i16 = bitcast i8* %p to i16*
+ %p2.i8 = getelementptr i8, i8* %p, i64 2
+ %v1 = load i16, i16* %p1.i16
+ %v2.i8 = load i8, i8* %p2.i8
+ %v2 = zext i8 %v2.i8 to i16
+ %v1.shl = shl i16 %v1, 8
+ %res = or i16 %v1.shl, %v2
+ ret i16 %res
+}
diff --git a/test/CodeGen/ARM/load-combine.ll b/test/CodeGen/ARM/load-combine.ll
new file mode 100644
index 0000000000000..720bc7b88b32f
--- /dev/null
+++ b/test/CodeGen/ARM/load-combine.ll
@@ -0,0 +1,692 @@
+; RUN: llc < %s -mtriple=arm-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-unknown | FileCheck %s --check-prefix=CHECK-ARMv6
+
+; i8* p; // p is 1 byte aligned
+; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
+define i32 @load_i32_by_i8_unaligned(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_unaligned:
+; CHECK: ldrb{{.*}}r0
+; CHECK: ldrb{{.*}}r0
+; CHECK: ldrb{{.*}}r0
+; CHECK: ldrb{{.*}}r0
+; CHECK: orr
+; CHECK: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_unaligned:
+; CHECK-ARMv6: ldrb{{.*}}r0
+; CHECK-ARMv6: ldrb{{.*}}r0
+; CHECK-ARMv6: ldrb{{.*}}r0
+; CHECK-ARMv6: ldrb{{.*}}r0
+; CHECK-ARMv6: orr
+; CHECK-ARMv6: bx lr
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p is 4 byte aligned
+; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
+define i32 @load_i32_by_i8_aligned(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_aligned:
+; CHECK: ldr r0, [r0]
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_aligned:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p is 4 byte aligned
+; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
+define i32 @load_i32_by_i8_bswap(i32* %arg) {
+; BSWAP is not supported by 32 bit target
+; CHECK-LABEL: load_i32_by_i8_bswap:
+; CHECK: ldr r0, [r0]
+; CHECK: and
+; CHECK-NEXT: and
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = load i8, i8* %tmp, align 4
+ %tmp2 = zext i8 %tmp1 to i32
+ %tmp3 = shl nuw nsw i32 %tmp2, 24
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 16
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 8
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = or i32 %tmp13, %tmp16
+ ret i32 %tmp17
+}
+
+; i8* p; // p is 8 byte aligned
+; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
+define i64 @load_i64_by_i8(i64* %arg) {
+; CHECK-LABEL: load_i64_by_i8:
+; CHECK: ldr r2, [r0]
+; CHECK-NEXT: ldr r1, [r0, #4]
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i64_by_i8:
+; CHECK-ARMv6: ldrd r0, r1, [r0]
+; CHECK-ARMv6: bx lr
+ %tmp = bitcast i64* %arg to i8*
+ %tmp1 = load i8, i8* %tmp, align 8
+ %tmp2 = zext i8 %tmp1 to i64
+ %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
+ %tmp4 = load i8, i8* %tmp3, align 1
+ %tmp5 = zext i8 %tmp4 to i64
+ %tmp6 = shl nuw nsw i64 %tmp5, 8
+ %tmp7 = or i64 %tmp6, %tmp2
+ %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
+ %tmp9 = load i8, i8* %tmp8, align 1
+ %tmp10 = zext i8 %tmp9 to i64
+ %tmp11 = shl nuw nsw i64 %tmp10, 16
+ %tmp12 = or i64 %tmp7, %tmp11
+ %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
+ %tmp14 = load i8, i8* %tmp13, align 1
+ %tmp15 = zext i8 %tmp14 to i64
+ %tmp16 = shl nuw nsw i64 %tmp15, 24
+ %tmp17 = or i64 %tmp12, %tmp16
+ %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
+ %tmp19 = load i8, i8* %tmp18, align 1
+ %tmp20 = zext i8 %tmp19 to i64
+ %tmp21 = shl nuw nsw i64 %tmp20, 32
+ %tmp22 = or i64 %tmp17, %tmp21
+ %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
+ %tmp24 = load i8, i8* %tmp23, align 1
+ %tmp25 = zext i8 %tmp24 to i64
+ %tmp26 = shl nuw nsw i64 %tmp25, 40
+ %tmp27 = or i64 %tmp22, %tmp26
+ %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
+ %tmp29 = load i8, i8* %tmp28, align 1
+ %tmp30 = zext i8 %tmp29 to i64
+ %tmp31 = shl nuw nsw i64 %tmp30, 48
+ %tmp32 = or i64 %tmp27, %tmp31
+ %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
+ %tmp34 = load i8, i8* %tmp33, align 1
+ %tmp35 = zext i8 %tmp34 to i64
+ %tmp36 = shl nuw i64 %tmp35, 56
+ %tmp37 = or i64 %tmp32, %tmp36
+ ret i64 %tmp37
+}
+
+; i8* p; // p is 8 byte aligned
+; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
+define i64 @load_i64_by_i8_bswap(i64* %arg) {
+; CHECK-LABEL: load_i64_by_i8_bswap:
+; CHECK: ldr{{.*}}r0
+; CHECK: ldr{{.*}}r0
+; CHECK: and
+; CHECK-NEXT: and
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK-NEXT: and
+; CHECK-NEXT: orr
+; CHECK-NEXT: and
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK-NEXT: orr
+; CHECK: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
+; CHECK-ARMv6: ldrd r2, r3, [r0]
+; CHECK-ARMv6: rev r0, r3
+; CHECK-ARMv6: rev r1, r2
+; CHECK-ARMv6: bx lr
+ %tmp = bitcast i64* %arg to i8*
+ %tmp1 = load i8, i8* %tmp, align 8
+ %tmp2 = zext i8 %tmp1 to i64
+ %tmp3 = shl nuw i64 %tmp2, 56
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i64
+ %tmp7 = shl nuw nsw i64 %tmp6, 48
+ %tmp8 = or i64 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i64
+ %tmp12 = shl nuw nsw i64 %tmp11, 40
+ %tmp13 = or i64 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i64
+ %tmp17 = shl nuw nsw i64 %tmp16, 32
+ %tmp18 = or i64 %tmp13, %tmp17
+ %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
+ %tmp20 = load i8, i8* %tmp19, align 1
+ %tmp21 = zext i8 %tmp20 to i64
+ %tmp22 = shl nuw nsw i64 %tmp21, 24
+ %tmp23 = or i64 %tmp18, %tmp22
+ %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
+ %tmp25 = load i8, i8* %tmp24, align 1
+ %tmp26 = zext i8 %tmp25 to i64
+ %tmp27 = shl nuw nsw i64 %tmp26, 16
+ %tmp28 = or i64 %tmp23, %tmp27
+ %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
+ %tmp30 = load i8, i8* %tmp29, align 1
+ %tmp31 = zext i8 %tmp30 to i64
+ %tmp32 = shl nuw nsw i64 %tmp31, 8
+ %tmp33 = or i64 %tmp28, %tmp32
+ %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
+ %tmp35 = load i8, i8* %tmp34, align 1
+ %tmp36 = zext i8 %tmp35 to i64
+ %tmp37 = or i64 %tmp33, %tmp36
+ ret i64 %tmp37
+}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
+define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK: ldr r0, [r0, #1]
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
+; CHECK-ARMv6: ldr r0, [r0, #1]
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
+define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset:
+; CHECK: ldr r0, [r0, #-4]
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
+; CHECK-ARMv6: ldr r0, [r0, #-4]
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp2 = load i8, i8* %tmp1, align 4
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp15 = load i8, i8* %tmp14, align 1
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[1] is 4 byte aligned
+; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
+define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK: ldr r0, [r0, #1]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
+; CHECK-ARMv6: ldr r0, [r0, #1]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+; i8* p; // p[-4] is 4 byte aligned
+; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
+define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
+; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK: ldr r0, [r0, #-4]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
+; CHECK-ARMv6: ldr r0, [r0, #-4]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
+ %tmp10 = load i8, i8* %tmp9, align 1
+ %tmp11 = zext i8 %tmp10 to i32
+ %tmp12 = shl nuw nsw i32 %tmp11, 16
+ %tmp13 = or i32 %tmp8, %tmp12
+ %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
+ %tmp15 = load i8, i8* %tmp14, align 4
+ %tmp16 = zext i8 %tmp15 to i32
+ %tmp17 = shl nuw nsw i32 %tmp16, 24
+ %tmp18 = or i32 %tmp13, %tmp17
+ ret i32 %tmp18
+}
+
+declare i16 @llvm.bswap.i16(i16)
+
+; i16* p; // p is 4 byte aligned
+; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
+define i32 @load_i32_by_bswap_i16(i32* %arg) {
+; CHECK-LABEL: load_i32_by_bswap_i16:
+; CHECK: ldr r0, [r0]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
+; CHECK-NEXT: mov pc, lr
+
+; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i16*
+ %tmp1 = load i16, i16* %tmp, align 4
+ %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
+ %tmp2 = zext i16 %tmp11 to i32
+ %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
+ %tmp4 = load i16, i16* %tmp3, align 1
+ %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
+ %tmp5 = zext i16 %tmp41 to i32
+ %tmp6 = shl nuw nsw i32 %tmp2, 16
+ %tmp7 = or i32 %tmp6, %tmp5
+ ret i32 %tmp7
+}
+
+; i16* p;
+; (i32) p[0] | (sext(p[1] << 16) to i32)
+define i32 @load_i32_by_sext_i16(i32* %arg) {
+; CHECK-LABEL: load_i32_by_sext_i16:
+; CHECK: ldr r0, [r0]
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: load_i32_by_sext_i16:
+; CHECK-ARMv6: ldr r0, [r0]
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = bitcast i32* %arg to i16*
+ %tmp1 = load i16, i16* %tmp, align 4
+ %tmp2 = zext i16 %tmp1 to i32
+ %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
+ %tmp4 = load i16, i16* %tmp3, align 1
+ %tmp5 = sext i16 %tmp4 to i32
+ %tmp6 = shl nuw nsw i32 %tmp5, 16
+ %tmp7 = or i32 %tmp6, %tmp2
+ ret i32 %tmp7
+}
+
+; i8* arg; i32 i;
+; p = arg + 12;
+; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
+define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
+; CHECK-LABEL: load_i32_by_i8_base_offset_index:
+; CHECK: add r0, r0, r1
+; CHECK-NEXT: ldr r0, [r0, #12]
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
+; CHECK-ARMv6: add r0, r0, r1
+; CHECK-ARMv6-NEXT: ldr r0, [r0, #12]
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = add nuw nsw i32 %i, 3
+ %tmp2 = add nuw nsw i32 %i, 2
+ %tmp3 = add nuw nsw i32 %i, 1
+ %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
+ %tmp5 = zext i32 %i to i64
+ %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
+ %tmp7 = load i8, i8* %tmp6, align 4
+ %tmp8 = zext i8 %tmp7 to i32
+ %tmp9 = zext i32 %tmp3 to i64
+ %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
+ %tmp11 = load i8, i8* %tmp10, align 1
+ %tmp12 = zext i8 %tmp11 to i32
+ %tmp13 = shl nuw nsw i32 %tmp12, 8
+ %tmp14 = or i32 %tmp13, %tmp8
+ %tmp15 = zext i32 %tmp2 to i64
+ %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
+ %tmp17 = load i8, i8* %tmp16, align 1
+ %tmp18 = zext i8 %tmp17 to i32
+ %tmp19 = shl nuw nsw i32 %tmp18, 16
+ %tmp20 = or i32 %tmp14, %tmp19
+ %tmp21 = zext i32 %tmp to i64
+ %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
+ %tmp23 = load i8, i8* %tmp22, align 1
+ %tmp24 = zext i8 %tmp23 to i32
+ %tmp25 = shl nuw i32 %tmp24, 24
+ %tmp26 = or i32 %tmp20, %tmp25
+ ret i32 %tmp26
+}
+
+; i8* arg; i32 i;
+; p = arg + 12;
+; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
+define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
+; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
+; CHECK: add r0, r0, r1
+; CHECK-NEXT: ldr r0, [r0, #13]
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
+; CHECK-ARMv6: add r0, r0, r1
+; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
+; CHECK-ARMv6-NEXT: bx lr
+ %tmp = add nuw nsw i32 %i, 4
+ %tmp2 = add nuw nsw i32 %i, 3
+ %tmp3 = add nuw nsw i32 %i, 2
+ %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
+ %tmp5 = add nuw nsw i32 %i, 1
+ %tmp27 = zext i32 %tmp5 to i64
+ %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
+ %tmp29 = load i8, i8* %tmp28, align 4
+ %tmp30 = zext i8 %tmp29 to i32
+ %tmp31 = zext i32 %tmp3 to i64
+ %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
+ %tmp33 = load i8, i8* %tmp32, align 1
+ %tmp34 = zext i8 %tmp33 to i32
+ %tmp35 = shl nuw nsw i32 %tmp34, 8
+ %tmp36 = or i32 %tmp35, %tmp30
+ %tmp37 = zext i32 %tmp2 to i64
+ %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
+ %tmp39 = load i8, i8* %tmp38, align 1
+ %tmp40 = zext i8 %tmp39 to i32
+ %tmp41 = shl nuw nsw i32 %tmp40, 16
+ %tmp42 = or i32 %tmp36, %tmp41
+ %tmp43 = zext i32 %tmp to i64
+ %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
+ %tmp45 = load i8, i8* %tmp44, align 1
+ %tmp46 = zext i8 %tmp45 to i32
+ %tmp47 = shl nuw i32 %tmp46, 24
+ %tmp48 = or i32 %tmp42, %tmp47
+ ret i32 %tmp48
+}
+
+; i8* p; // p is 2 byte aligned
+; (i32) p[0] | ((i32) p[1] << 8)
+define i32 @zext_load_i32_by_i8(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: orr r0, r1, r0, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp1, align 2
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[0] << 8) | ((i32) p[1] << 16)
+define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: lsl r0, r0, #16
+; CHECK-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: lsl r0, r0, #16
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp1, align 2
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 8
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 16
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[0] << 16) | ((i32) p[1] << 24)
+define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: lsl r0, r0, #24
+; CHECK-NEXT: orr r0, r0, r1, lsl #16
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: lsl r0, r0, #24
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #16
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp1, align 2
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 16
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 24
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; (i32) p[1] | ((i32) p[0] << 8)
+define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_bswap:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp5 = load i8, i8* %tmp4, align 2
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[1] << 8) | ((i32) p[0] << 16)
+define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: lsl r1, r1, #16
+; CHECK-NEXT: orr r0, r1, r0, lsl #8
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: lsl r1, r1, #16
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 8
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp5 = load i8, i8* %tmp4, align 2
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 16
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[1] << 16) | ((i32) p[0] << 24)
+define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
+; CHECK: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: lsl r1, r1, #24
+; CHECK-NEXT: orr r0, r1, r0, lsl #16
+; CHECK-NEXT: mov pc, lr
+;
+; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
+; CHECK-ARMv6: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: lsl r1, r1, #24
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #16
+; CHECK-ARMv6-NEXT: bx lr
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 16
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp5 = load i8, i8* %tmp4, align 2
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 24
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll
index 80cb5096c03c5..9ecda8b06cbf2 100644
--- a/test/CodeGen/ARM/longMAC.ll
+++ b/test/CodeGen/ARM/longMAC.ll
@@ -1,14 +1,15 @@
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-LE
-; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s --check-prefix=CHECK-V7-LE
+; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-V7-LE
; RUN: llc -mtriple=armeb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
-; RUN: llc -mtriple=armebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-BE
-; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB
-; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB2
-; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB
-; RUN: llc -mtriple=thumbebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB-BE
-; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6M-THUMB
-; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7M-THUMB
-; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7EM-THUMB
+; RUN: llc -mtriple=armebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V7-BE
+; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V6-THUMB
+; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2-DSP
+; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2-DSP
+; RUN: llc -mtriple=thumbebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V7-THUMB-BE
+; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V6M-THUMB
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V7M-THUMB
+; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2-DSP
+; RUN: llc -mtriple=armv5te-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V5TE
; Check generated signed and unsigned multiply accumulate long.
define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
@@ -20,12 +21,9 @@ define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
;CHECK-BE: mov r0, [[RDHI]]
;CHECK-BE: mov r1, [[RDLO]]
-;CHECK-V6-THUMB2: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V6-THUMB2: mov r0, [[RDLO]]
-;CHECK-V6-THUMB2: mov r1, [[RDHI]]
-;CHECK-V7-THUMB: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V7-THUMB: mov r0, [[RDLO]]
-;CHECK-V7-THUMB: mov r1, [[RDHI]]
+;CHECK-T2-DSP: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
+;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]]
+;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]]
;CHECK-V7-THUMB-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
@@ -44,12 +42,9 @@ define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) {
;CHECK-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
;CHECK-BE: mov r0, [[RDHI]]
;CHECK-BE: mov r1, [[RDLO]]
-;CHECK-V6-THUMB2: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V6-THUMB2: mov r0, [[RDLO]]
-;CHECK-V6-THUMB2: mov r1, [[RDHI]]
-;CHECK-V7-THUMB: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V7-THUMB: mov r0, [[RDLO]]
-;CHECK-V7-THUMB: mov r1, [[RDHI]]
+;CHECK-T2-DSP: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
+;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]]
+;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]]
;CHECK-V7-THUMB-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
@@ -78,8 +73,7 @@ define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0
;CHECK-BE: mov r0, [[RDHI]]
;CHECK-BE: mov r1, [[RDLO]]
-;CHECK-V6-THUMB2: umlal
-;CHECK-V7-THUMB: umlal
+;CHECK-T2-DSP: umlal
;CHECK-V6-THUMB-NOT: umlal
%conv = zext i32 %b to i64
%conv1 = zext i32 %a to i64
@@ -92,8 +86,7 @@ define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
;CHECK-LABEL: MACLongTest4:
;CHECK-V6-THUMB-NOT: smlal
-;CHECK-V6-THUMB2: smlal
-;CHECK-V7-THUMB: smlal
+;CHECK-T2-DSP: smlal
;CHECK-LE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31
;CHECK-LE: smlal [[RDLO]], [[RDHI]], r1, r0
;CHECK-LE: mov r0, [[RDLO]]
@@ -114,14 +107,12 @@ define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) {
;CHECK-LABEL: MACLongTest6:
;CHECK-V6-THUMB-NOT: smull
;CHECK-V6-THUMB-NOT: smlal
-;CHECK: smull r12, lr, r1, r0
-;CHECK: smlal r12, lr, r3, r2
+;CHECK-LE: smull r12, lr, r1, r0
+;CHECK-LE: smlal r12, lr, r3, r2
;CHECK-V7: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
;CHECK-V7: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
-;CHECK-V7-THUMB: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
-;CHECK-V7-THUMB: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
-;CHECK-V6-THUMB2: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
-;CHECK-V6-THUMB2: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
+;CHECK-T2-DSP: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
+;CHECK-T2-DSP: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
%conv = sext i32 %a to i64
%conv1 = sext i32 %b to i64
%mul = mul nsw i64 %conv1, %conv
@@ -172,18 +163,12 @@ define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
;CHECK-V7-BE: mov r0, [[RDHI]]
;CHECK-V7-BE: mov r1, [[RDLO]]
-;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V6-THUMB2: mov r0, [[RDLO]]
-;CHECK-V6-THUMB2: mov r1, [[RDHI]]
-;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V7-THUMB: mov r0, [[RDLO]]
-;CHECK-V7-THUMB: mov r1, [[RDHI]]
+;CHECK-T2-DSP: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
+;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]]
+;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]]
;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
-;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
-;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
;CHECK-NOT:umaal
;CHECK-V6-THUMB-NOT: umaal
;CHECK-V6M-THUMB-NOT: umaal
@@ -206,18 +191,12 @@ define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
;CHECK-V7-BE: mov r0, [[RDHI]]
;CHECK-V7-BE: mov r1, [[RDLO]]
-;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V6-THUMB2: mov r0, [[RDLO]]
-;CHECK-V6-THUMB2: mov r1, [[RDHI]]
-;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V7-THUMB: mov r0, [[RDLO]]
-;CHECK-V7-THUMB: mov r1, [[RDHI]]
+;CHECK-T2-DSP: umaal r2, r3, r1, r0
+;CHECK-T2-DSP-NEXT: mov r0, r2
+;CHECK-T2-DSP-NEXT: mov r1, r3
;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
-;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
-;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
-;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
;CHECK-NOT:umaal
;CHECK-V6-THUMB-NOT:umaal
;CHECK-V6M-THUMB-NOT: umaal
@@ -231,3 +210,188 @@ define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
%add2 = add i64 %add, %mul
ret i64 %add2
}
+
+define i64 @MACLongTest11(i16 %a, i16 %b, i64 %c) {
+;CHECK-LABEL: MACLongTest11:
+;CHECK-T2-DSP-NOT: sxth
+;CHECK-T2-DSP: smlalbb r2, r3
+;CHECK-T2-DSP-NEXT: mov r0, r2
+;CHECK-T2-DSP-NEXT: mov r1, r3
+;CHECK-V5TE-NOT: sxth
+;CHECK-V5TE: smlalbb r2, r3
+;CHECK-V5TE-NEXT: mov r0, r2
+;CHECK-V5TE-NEXT: mov r1, r3
+;CHECK-V7-LE-NOT: sxth
+;CHECK-V7-LE: smlalbb r2, r3
+;CHECK-V7-LE-NEXT: mov r0, r2
+;CHECK-V7-LE-NEXT: mov r1, r3
+;CHECK-V7-THUMB-BE: smlalbb r3, r2
+;CHECK-V7-THUMB-BE-NEXT: mov r0, r2
+;CHECK-V7-THUMB-BE-NEXT: mov r1, r3
+;CHECK-LE-NOT: smlalbb
+;CHECK-BE-NOT: smlalbb
+;CHECK-V6M-THUMB-NOT: smlalbb
+;CHECK-V7M-THUMB-NOT: smlalbb
+ %conv = sext i16 %a to i32
+ %conv1 = sext i16 %b to i32
+ %mul = mul nsw i32 %conv1, %conv
+ %conv2 = sext i32 %mul to i64
+ %add = add nsw i64 %conv2, %c
+ ret i64 %add
+}
+
+define i64 @MACLongTest12(i16 %b, i32 %t, i64 %c) {
+;CHECK-LABEL: MACLongTest12:
+;CHECK-T2-DSP-NOT: sxth
+;CHECK-T2-DSP-NOT: {{asr|lsr}}
+;CHECK-T2-DSP: smlalbt r2, r3, r0, r1
+;CHECK-T2-DSP-NEXT: mov r0, r2
+;CHECK-T2-DSP-NEXT: mov r1, r3
+;CHECK-T2-DSP-NOT: sxth
+;CHECK-V5TE-NOT: sxth
+;CHECK-V5TE-NOT: {{asr|lsr}}
+;CHECK-V5TE: smlalbt r2, r3, r0, r1
+;CHECK-V5TE-NEXT: mov r0, r2
+;CHECK-V5TE-NEXT: mov r1, r3
+;CHECK-V7-LE-NOT: sxth
+;CHECK-V7-LE-NOT: {{asr|lsr}}
+;CHECK-V7-LE: smlalbt r2, r3, r0, r1
+;CHECK-V7-LE-NEXT: mov r0, r2
+;CHECK-V7-LE-NEXT: mov r1, r3
+;CHECK-V7-THUMB-BE: smlalbt r3, r2,
+;CHECK-V7-THUMB-BE-NEXT: mov r0, r2
+;CHECK-V7-THUMB-BE-NEXT: mov r1, r3
+;CHECK-LE-NOT: smlalbt
+;CHECK-BE-NOT: smlalbt
+;CHECK-V6M-THUMB-NOT: smlalbt
+;CHECK-V7M-THUMB-NOT: smlalbt
+ %conv0 = sext i16 %b to i32
+ %conv1 = ashr i32 %t, 16
+ %mul = mul nsw i32 %conv0, %conv1
+ %conv2 = sext i32 %mul to i64
+ %add = add nsw i64 %conv2, %c
+ ret i64 %add
+}
+
+define i64 @MACLongTest13(i32 %t, i16 %b, i64 %c) {
+;CHECK-LABEL: MACLongTest13:
+;CHECK-T2-DSP-NOT: sxth
+;CHECK-T2-DSP-NOT: {{asr|lsr}}
+;CHECK-T2-DSP: smlaltb r2, r3, r0, r1
+;CHECK-T2-DSP-NEXT: mov r0, r2
+;CHECK-T2-DSP-NEXT: mov r1, r3
+;CHECK-V5TE-NOT: sxth
+;CHECK-V5TE-NOT: {{asr|lsr}}
+;CHECK-V5TE: smlaltb r2, r3, r0, r1
+;CHECK-V5TE-NEXT: mov r0, r2
+;CHECK-V5TE-NEXT: mov r1, r3
+;CHECK-V7-LE-NOT: sxth
+;CHECK-V7-LE-NOT: {{asr|lsr}}
+;CHECK-V7-LE: smlaltb r2, r3, r0, r1
+;CHECK-V7-LE-NEXT: mov r0, r2
+;CHECK-V7-LE-NEXT: mov r1, r3
+;CHECK-V7-THUMB-BE: smlaltb r3, r2, r0, r1
+;CHECK-V7-THUMB-BE-NEXT: mov r0, r2
+;CHECK-V7-THUMB-BE-NEXT: mov r1, r3
+;CHECK-LE-NOT: smlaltb
+;CHECK-BE-NOT: smlaltb
+;CHECK-V6M-THUMB-NOT: smlaltb
+;CHECK-V7M-THUMB-NOT: smlaltb
+ %conv0 = ashr i32 %t, 16
+ %conv1= sext i16 %b to i32
+ %mul = mul nsw i32 %conv0, %conv1
+ %conv2 = sext i32 %mul to i64
+ %add = add nsw i64 %conv2, %c
+ ret i64 %add
+}
+
+define i64 @MACLongTest14(i32 %a, i32 %b, i64 %c) {
+;CHECK-LABEL: MACLongTest14:
+;CHECK-T2-DSP-NOT: {{asr|lsr}}
+;CHECK-T2-DSP: smlaltt r2, r3,
+;CHECK-T2-DSP-NEXT: mov r0, r2
+;CHECK-T2-DSP-NEXT: mov r1, r3
+;CHECK-V5TE-NOT: {{asr|lsr}}
+;CHECK-V5TE: smlaltt r2, r3,
+;CHECK-V5TE-NEXT: mov r0, r2
+;CHECK-V5TE-NEXT: mov r1, r3
+;CHECK-V7-LE-NOT: {{asr|lsr}}
+;CHECK-V7-LE: smlaltt r2, r3,
+;CHECK-V7-LE-NEXT: mov r0, r2
+;CHECK-V7-LE-NEXT: mov r1, r3
+;CHECK-V7-THUMB-BE: smlaltt r3, r2,
+;CHECK-V7-THUMB-BE-NEXT: mov r0, r2
+;CHECK-V7-THUMB-BE-NEXT: mov r1, r3
+;CHECK-LE-NOT: smlaltt
+;CHECK-BE-NOT: smlaltt
+;CHECK-V6M-THUMB-NOT: smlaltt
+;CHECK-V7M-THUMB-NOT: smlaltt
+ %conv0 = ashr i32 %a, 16
+ %conv1 = ashr i32 %b, 16
+ %mul = mul nsw i32 %conv1, %conv0
+ %conv2 = sext i32 %mul to i64
+ %add = add nsw i64 %conv2, %c
+ ret i64 %add
+}
+
+@global_b = external global i16, align 2
+;CHECK-LABEL: MACLongTest15
+;CHECK-T2-DSP-NOT: {{asr|lsr}}
+;CHECK-T2-DSP: smlaltb r2, r3, r0, r1
+;CHECK-T2-DSP-NEXT: mov r0, r2
+;CHECK-T2-DSP-NEXT: mov r1, r3
+;CHECK-V5TE-NOT: {{asr|lsr}}
+;CHECK-V5TE: smlaltb r2, r3, r0, r1
+;CHECK-V5TE-NEXT: mov r0, r2
+;CHECK-V5TE-NEXT: mov r1, r3
+;CHECK-V7-LE-NOT: {{asr|lsr}}
+;CHECK-V7-LE: smlaltb r2, r3, r0, r1
+;CHECK-V7-LE-NEXT: mov r0, r2
+;CHECK-V7-LE-NEXT: mov r1, r3
+;CHECK-V7-THUMB-BE: smlaltb r3, r2, r0, r1
+;CHECK-V7-THUMB-BE-NEXT: mov r0, r2
+;CHECK-V7-THUMB-BE-NEXT: mov r1, r3
+;CHECK-LE-NOT: smlaltb
+;CHECK-BE-NOT: smlaltb
+;CHECK-V6M-THUMB-NOT: smlaltb
+;CHECK-V7M-THUMB-NOT: smlaltb
+define i64 @MACLongTest15(i32 %t, i64 %acc) {
+entry:
+ %0 = load i16, i16* @global_b, align 2
+ %conv = sext i16 %0 to i32
+ %shr = ashr i32 %t, 16
+ %mul = mul nsw i32 %shr, %conv
+ %conv1 = sext i32 %mul to i64
+ %add = add nsw i64 %conv1, %acc
+ ret i64 %add
+}
+
+;CHECK-LABEL: MACLongTest16
+;CHECK-T2-DSP-NOT: {{asr|lsr}}
+;CHECK-T2-DSP: smlalbt r2, r3, r1, r0
+;CHECK-T2-DSP-NEXT: mov r0, r2
+;CHECK-T2-DSP-NEXT: mov r1, r3
+;CHECK-V5TE-NOT: {{asr|lsr}}
+;CHECK-V5TE: smlalbt r2, r3, r1, r0
+;CHECK-V5TE-NEXT: mov r0, r2
+;CHECK-V5TE-NEXT: mov r1, r3
+;CHECK-V7-LE: smlalbt r2, r3, r1, r0
+;CHECK-V7-LE-NEXT: mov r0, r2
+;CHECK-V7-LE-NEXT: mov r1, r3
+;CHECK-V7-THUMB-BE: smlalbt r3, r2, r1, r0
+;CHECK-V7-THUMB-BE-NEXT: mov r0, r2
+;CHECK-V7-THUMB-BE-NEXT: mov r1, r3
+;CHECK-LE-NOT: smlalbt
+;CHECK-BE-NOT: smlalbt
+;CHECK-V6M-THUMB-NOT: smlalbt
+;CHECK-V7M-THUMB-NOT: smlalbt
+define i64 @MACLongTest16(i32 %t, i64 %acc) {
+entry:
+ %0 = load i16, i16* @global_b, align 2
+ %conv = sext i16 %0 to i32
+ %shr = ashr i32 %t, 16
+ %mul = mul nsw i32 %conv, %shr
+ %conv1 = sext i32 %mul to i64
+ %add = add nsw i64 %conv1, %acc
+ ret i64 %add
+}
diff --git a/test/CodeGen/ARM/lowerMUL-newload.ll b/test/CodeGen/ARM/lowerMUL-newload.ll
new file mode 100644
index 0000000000000..93d765cba1168
--- /dev/null
+++ b/test/CodeGen/ARM/lowerMUL-newload.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=krait | FileCheck %s
+
+define void @func1(i16* %a, i16* %b, i16* %c) {
+entry:
+; The test case trying to vectorize the pseudo code below.
+; a[i] = b[i] + c[i];
+; b[i] = a[i] * c[i];
+; a[i] = b[i] + a[i] * c[i];
+;
+; Checking that vector load a[i] for "a[i] = b[i] + a[i] * c[i]" is
+; scheduled before the first vector store to "a[i] = b[i] + c[i]".
+; Checking that there is no vector load a[i] scheduled between the vector
+; stores to a[i], otherwise the load of a[i] will be polluted by the first
+; vector store to a[i].
+;
+; This test case check that the chain information is updated during
+; lowerMUL for the new created Load SDNode.
+
+; CHECK: vldr {{.*}} [r0, #16]
+; CHECK: vstr {{.*}} [r0, #16]
+; CHECK-NOT: vldr {{.*}} [r0, #16]
+; CHECK: vstr {{.*}} [r0, #16]
+
+ %scevgep0 = getelementptr i16, i16* %a, i32 8
+ %vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>*
+ %vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8
+ %scevgep1 = getelementptr i16, i16* %b, i32 8
+ %vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>*
+ %vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8
+ %0 = zext <4 x i16> %vec1 to <4 x i32>
+ %scevgep2 = getelementptr i16, i16* %c, i32 8
+ %vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>*
+ %vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8
+ %1 = sext <4 x i16> %vec2 to <4 x i32>
+ %vec3 = add <4 x i32> %1, %0
+ %2 = trunc <4 x i32> %vec3 to <4 x i16>
+ %scevgep3 = getelementptr i16, i16* %a, i32 8
+ %vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>*
+ store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8
+ %vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>*
+ %vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8
+ %3 = sext <4 x i16> %vec4 to <4 x i32>
+ %vec5 = mul <4 x i32> %3, %vec3
+ %4 = trunc <4 x i32> %vec5 to <4 x i16>
+ %vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>*
+ store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8
+ %5 = sext <4 x i16> %vec0 to <4 x i32>
+ %vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>*
+ %vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8
+ %6 = sext <4 x i16> %vec6 to <4 x i32>
+ %vec7 = mul <4 x i32> %6, %5
+ %vec8 = add <4 x i32> %vec7, %vec5
+ %7 = trunc <4 x i32> %vec8 to <4 x i16>
+ %vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>*
+ store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8
+ ret void
+}
+
+define void @func2(i16* %a, i16* %b, i16* %c) {
+entry:
+; The test case trying to vectorize the pseudo code below.
+; a[i] = b[i] + c[i];
+; b[i] = a[i] * c[i];
+; a[i] = b[i] + a[i] * c[i] + a[i];
+;
+; Checking that vector load a[i] for "a[i] = b[i] + a[i] * c[i] + a[i]"
+; is scheduled before the first vector store to "a[i] = b[i] + c[i]".
+; Checking that there is no vector load a[i] scheduled between the first
+; vector store to a[i] and the vector add of a[i], otherwise the load of
+; a[i] will be polluted by the first vector store to a[i].
+;
+; This test case check that both the chain and value of the new created
+; Load SDNode are updated during lowerMUL.
+
+; CHECK: vldr {{.*}} [r0, #16]
+; CHECK: vstr {{.*}} [r0, #16]
+; CHECK-NOT: vldr {{.*}} [r0, #16]
+; CHECK: vaddw.s16
+; CHECK: vstr {{.*}} [r0, #16]
+
+ %scevgep0 = getelementptr i16, i16* %a, i32 8
+ %vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>*
+ %vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8
+ %scevgep1 = getelementptr i16, i16* %b, i32 8
+ %vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>*
+ %vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8
+ %0 = zext <4 x i16> %vec1 to <4 x i32>
+ %scevgep2 = getelementptr i16, i16* %c, i32 8
+ %vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>*
+ %vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8
+ %1 = sext <4 x i16> %vec2 to <4 x i32>
+ %vec3 = add <4 x i32> %1, %0
+ %2 = trunc <4 x i32> %vec3 to <4 x i16>
+ %scevgep3 = getelementptr i16, i16* %a, i32 8
+ %vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>*
+ store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8
+ %vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>*
+ %vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8
+ %3 = sext <4 x i16> %vec4 to <4 x i32>
+ %vec5 = mul <4 x i32> %3, %vec3
+ %4 = trunc <4 x i32> %vec5 to <4 x i16>
+ %vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>*
+ store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8
+ %5 = sext <4 x i16> %vec0 to <4 x i32>
+ %vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>*
+ %vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8
+ %6 = sext <4 x i16> %vec6 to <4 x i32>
+ %vec7 = mul <4 x i32> %6, %5
+ %vec8 = add <4 x i32> %vec7, %vec5
+ %vec9 = add <4 x i32> %vec8, %5
+ %7 = trunc <4 x i32> %vec9 to <4 x i16>
+ %vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>*
+ store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8
+ ret void
+}
diff --git a/test/CodeGen/ARM/mature-mc-support.ll b/test/CodeGen/ARM/mature-mc-support.ll
index 0a7e5b91adc5f..f89657dd81ac3 100644
--- a/test/CodeGen/ARM/mature-mc-support.ll
+++ b/test/CodeGen/ARM/mature-mc-support.ll
@@ -9,4 +9,4 @@
module asm " .this_directive_is_very_unlikely_to_exist"
-; CHECK: LLVM ERROR: Error parsing inline asm
+; CHECK: error: unknown directive
diff --git a/test/CodeGen/ARM/misched-fp-basic.ll b/test/CodeGen/ARM/misched-fp-basic.ll
new file mode 100644
index 0000000000000..27ad2cec34fd6
--- /dev/null
+++ b/test/CodeGen/ARM/misched-fp-basic.ll
@@ -0,0 +1,69 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a9 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
+; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=swift -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
+; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
+; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52
+;
+; Check the latency of instructions for processors with sched-models
+;
+; Function Attrs: norecurse nounwind readnone
+define i32 @foo(float %a, float %b, float %c, i32 %d) local_unnamed_addr #0 {
+entry:
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK_A9: VADDS
+; CHECK_SWIFT: VADDfd
+; CHECK_R52: VADDS
+; CHECK_A9: Latency : 5
+; CHECK_SWIFT: Latency : 4
+; CHECK_R52: Latency : 6
+;
+; CHECK_A9: VMULS
+; CHECK_SWIFT: VMULfd
+; CHECK_R52: VMULS
+; CHECK_SWIFT: Latency : 4
+; CHECK_A9: Latency : 6
+; CHECK_R52: Latency : 6
+;
+; CHECK: VDIVS
+; CHECK_SWIFT: Latency : 17
+; CHECK_A9: Latency : 16
+; CHECK_R52: Latency : 7
+;
+; CHECK: VCVTDS
+; CHECK_SWIFT: Latency : 4
+; CHECK_A9: Latency : 5
+; CHECK_R52: Latency : 6
+;
+; CHECK: VADDD
+; CHECK_SWIFT: Latency : 6
+; CHECK_A9: Latency : 5
+; CHECK_R52: Latency : 6
+;
+; CHECK: VMULD
+; CHECK_SWIFT: Latency : 6
+; CHECK_A9: Latency : 7
+; CHECK_R52: Latency : 6
+;
+; CHECK: VDIVD
+; CHECK_SWIFT: Latency : 32
+; CHECK_A9: Latency : 26
+; CHECK_R52: Latency : 17
+;
+; CHECK: VTOSIZD
+; CHECK_SWIFT: Latency : 4
+; CHECK_A9: Latency : 5
+; CHECK_R52: Latency : 6
+;
+ %add = fadd float %a, %b
+ %mul = fmul float %add, %add
+ %div = fdiv float %mul, %b
+ %conv1 = fpext float %div to double
+ %add3 = fadd double %conv1, %conv1
+ %mul4 = fmul double %add3, %add3
+ %div5 = fdiv double %mul4, %conv1
+ %conv6 = fptosi double %div5 to i32
+ ret i32 %conv6
+}
diff --git a/test/CodeGen/ARM/misched-int-basic-thumb2.mir b/test/CodeGen/ARM/misched-int-basic-thumb2.mir
new file mode 100644
index 0000000000000..86ef1e26f6368
--- /dev/null
+++ b/test/CodeGen/ARM/misched-int-basic-thumb2.mir
@@ -0,0 +1,175 @@
+# Basic machine sched model test for Thumb2 int instructions
+# RUN: llc -o /dev/null %s -mtriple=thumbv7-eabi -mcpu=swift -run-pass machine-scheduler -enable-misched -verify-misched \
+# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT
+# RUN: llc -o /dev/null %s -mtriple=thumbv7--eabi -mcpu=cortex-a9 -run-pass machine-scheduler -enable-misched -verify-misched \
+# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9
+# RUN: llc -o /dev/null %s -mtriple=thumbv8r-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -verify-misched \
+# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52
+# REQUIRES: asserts
+--- |
+ ; ModuleID = 'foo.ll'
+ source_filename = "foo.ll"
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv7---eabi"
+
+ @g1 = common global i32 0, align 4
+ @g2 = common global i32 0, align 4
+
+ define i64 @foo(i16 signext %a, i16 signext %b) {
+ entry:
+ %0 = load i32, i32* @g1, align 4
+ %1 = load i32, i32* @g2, align 4
+ %2 = add nuw nsw i32 %0, %0
+ %3 = sdiv i32 %2, %1
+ store i32 %3, i32* @g1, align 4
+ %d = mul nsw i16 %a, %a
+ %e = mul nsw i16 %b, %b
+ %f = add nuw nsw i16 %e, %d
+ %c = zext i16 %f to i32
+ %mul8 = mul nsw i32 %c, %3
+ %mul9 = mul nsw i32 %mul8, %mul8
+ %add10 = add nuw nsw i32 %mul9, %mul8
+ %conv1130 = zext i32 %add10 to i64
+ %mul12 = mul nuw nsw i64 %conv1130, %conv1130
+ %mul13 = mul nsw i64 %mul12, %mul12
+ %add14 = add nuw nsw i64 %mul13, %mul12
+ ret i64 %add14
+ }
+#
+# CHECK: ********** MI Scheduling **********
+# CHECK: SU(2): %vreg2<def> = t2MOVi32imm <ga:@g1>; rGPR:%vreg2
+# CHECK_A9: Latency : 2
+# CHECK_SWIFT: Latency : 2
+# CHECK_R52: Latency : 2
+#
+# CHECK: SU(3): %vreg3<def> = t2LDRi12 %vreg2, 0, pred:14, pred:%noreg; mem:LD4[@g1](dereferenceable) rGPR:%vreg3,%vreg2
+# CHECK_A9: Latency : 1
+# CHECK_SWIFT: Latency : 3
+# CHECK_R52: Latency : 4
+#
+# CHECK : SU(6): %vreg6<def> = t2ADDrr %vreg3, %vreg3, pred:14, pred:%noreg, opt:%noreg; rGPR:%vreg6,%vreg3,%vreg3
+# CHECK_A9: Latency : 1
+# CHECK_SWIFT: Latency : 1
+# CHECK_R52: Latency : 3
+
+# CHECK: SU(7): %vreg7<def> = t2SDIV %vreg6, %vreg5, pred:14, pred:%noreg; rGPR:%vreg7,%vreg6,%vreg5
+# CHECK_A9: Latency : 0
+# CHECK_SWIFT: Latency : 14
+# CHECK_R52: Latency : 8
+
+# CHECK: SU(8): t2STRi12 %vreg7, %vreg2, 0, pred:14, pred:%noreg; mem:ST4[@g1] rGPR:%vreg7,%vreg2
+# CHECK_A9: Latency : 1
+# CHECK_SWIFT: Latency : 0
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(9): %vreg8<def> = t2SMULBB %vreg1, %vreg1, pred:14, pred:%noreg; rGPR:%vreg8,%vreg1,%vreg1
+# CHECK_A9: Latency : 2
+# CHECK_SWIFT: Latency : 4
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(10): %vreg9<def> = t2SMLABB %vreg0, %vreg0, %vreg8, pred:14, pred:%noreg; rGPR:%vreg9,%vreg0,%vreg0,%vreg8
+# CHECK_A9: Latency : 2
+# CHECK_SWIFT: Latency : 4
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(11): %vreg10<def> = t2UXTH %vreg9, 0, pred:14, pred:%noreg; rGPR:%vreg10,%vreg9
+# CHECK_A9: Latency : 1
+# CHECK_SWIFT: Latency : 1
+# CHECK_R52: Latency : 3
+#
+# CHECK: SU(12): %vreg11<def> = t2MUL %vreg10, %vreg7, pred:14, pred:%noreg; rGPR:%vreg11,%vreg10,%vreg7
+# CHECK_A9: Latency : 2
+# CHECK_SWIFT: Latency : 4
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(13): %vreg12<def> = t2MLA %vreg11, %vreg11, %vreg11, pred:14, pred:%noreg; rGPR:%vreg12,%vreg11,%vreg11,%vreg11
+# CHECK_A9: Latency : 2
+# CHECK_SWIFT: Latency : 4
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(14): %vreg13<def>, %vreg14<def> = t2UMULL %vreg12, %vreg12, pred:14, pred:%noreg; rGPR:%vreg13,%vreg14,%vreg12,%vreg12
+# CHECK_A9: Latency : 3
+# CHECK_SWIFT: Latency : 5
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(18): %vreg19<def,tied4>, %vreg20<def,tied5> = t2UMLAL %vreg12, %vreg12, %vreg19<tied0>, %vreg20<tied1>, pred:14, pred:%noreg; rGPR:%vreg19,%vreg20,%vreg12,%vreg12,%vreg20
+# CHECK_A9: Latency : 3
+# CHECK_SWIFT: Latency : 7
+# CHECK_R52: Latency : 4
+# CHECK: ** ScheduleDAGMILive::schedule picking next node
+...
+---
+name: foo
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: rgpr }
+ - { id: 1, class: rgpr }
+ - { id: 2, class: rgpr }
+ - { id: 3, class: rgpr }
+ - { id: 4, class: rgpr }
+ - { id: 5, class: rgpr }
+ - { id: 6, class: rgpr }
+ - { id: 7, class: rgpr }
+ - { id: 8, class: rgpr }
+ - { id: 9, class: rgpr }
+ - { id: 10, class: rgpr }
+ - { id: 11, class: rgpr }
+ - { id: 12, class: rgpr }
+ - { id: 13, class: rgpr }
+ - { id: 14, class: rgpr }
+ - { id: 15, class: rgpr }
+ - { id: 16, class: rgpr }
+ - { id: 17, class: rgpr }
+ - { id: 18, class: rgpr }
+ - { id: 19, class: rgpr }
+ - { id: 20, class: rgpr }
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+ - { reg: '%r1', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %r0, %r1
+
+ %1 = COPY %r1
+ %0 = COPY %r0
+ %2 = t2MOVi32imm @g1
+ %3 = t2LDRi12 %2, 0, 14, _ :: (dereferenceable load 4 from @g1)
+ %4 = t2MOVi32imm @g2
+ %5 = t2LDRi12 %4, 0, 14, _ :: (dereferenceable load 4 from @g2)
+ %6 = t2ADDrr %3, %3, 14, _, _
+ %7 = t2SDIV %6, %5, 14, _
+ t2STRi12 %7, %2, 0, 14, _ :: (store 4 into @g1)
+ %8 = t2SMULBB %1, %1, 14, _
+ %9 = t2SMLABB %0, %0, %8, 14, _
+ %10 = t2UXTH %9, 0, 14, _
+ %11 = t2MUL %10, %7, 14, _
+ %12 = t2MLA %11, %11, %11, 14, _
+ %13, %14 = t2UMULL %12, %12, 14, _
+ %19, %16 = t2UMULL %13, %13, 14, _
+ %17 = t2MLA %13, %14, %16, 14, _
+ %20 = t2MLA %13, %14, %17, 14, _
+ %19, %20 = t2UMLAL %12, %12, %19, %20, 14, _
+ %r0 = COPY %19
+ %r1 = COPY %20
+ tBX_RET 14, _, implicit %r0, implicit %r1
+
+...
diff --git a/test/CodeGen/ARM/misched-int-basic.mir b/test/CodeGen/ARM/misched-int-basic.mir
new file mode 100644
index 0000000000000..f237c0a07b2ed
--- /dev/null
+++ b/test/CodeGen/ARM/misched-int-basic.mir
@@ -0,0 +1,128 @@
+# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=swift -run-pass machine-scheduler -enable-misched -verify-misched \
+# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT
+# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-a9 -run-pass machine-scheduler -enable-misched -verify-misched \
+# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9
+# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -verify-misched \
+# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52
+# REQUIRES: asserts
+--- |
+ ; ModuleID = 'foo.ll'
+ source_filename = "foo.ll"
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "arm---eabi"
+
+ define i64 @foo(i16 signext %a, i16 signext %b) {
+ entry:
+ %d = mul nsw i16 %a, %a
+ %e = mul nsw i16 %b, %b
+ %f = add nuw nsw i16 %e, %d
+ %c = zext i16 %f to i32
+ %mul8 = mul nsw i32 %c, %c
+ %mul9 = mul nsw i32 %mul8, %mul8
+ %add10 = add nuw nsw i32 %mul9, %mul8
+ %conv1130 = zext i32 %add10 to i64
+ %mul12 = mul nuw nsw i64 %conv1130, %conv1130
+ %mul13 = mul nsw i64 %mul12, %mul12
+ %add14 = add nuw nsw i64 %mul13, %mul12
+ ret i64 %add14
+ }
+
+# CHECK: ********** MI Scheduling **********
+# CHECK: SU(2): %vreg2<def> = SMULBB %vreg1, %vreg1, pred:14, pred:%noreg; GPR:%vreg2,%vreg1,%vreg1
+# CHECK_A9: Latency : 2
+# CHECK_SWIFT: Latency : 4
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(3): %vreg3<def> = SMLABB %vreg0, %vreg0, %vreg2, pred:14, pred:%noreg; GPRnopc:%vreg3,%vreg0,%vreg0 GPR:%vreg2
+# CHECK_A9: Latency : 2
+# CHECK_SWIFT: Latency : 4
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(4): %vreg4<def> = UXTH %vreg3, 0, pred:14, pred:%noreg; GPRnopc:%vreg4,%vreg3
+# CHECK_A9: Latency : 1
+# CHECK_SWIFT: Latency : 1
+# CHECK_R52: Latency : 3
+#
+# CHECK: SU(5): %vreg5<def> = MUL %vreg4, %vreg4, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg5,%vreg4,%vreg4
+# CHECK_A9: Latency : 2
+# CHECK_SWIFT: Latency : 4
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(6): %vreg6<def> = MLA %vreg5, %vreg5, %vreg5, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg6,%vreg5,%vreg5,%vreg5
+# CHECK_A9: Latency : 2
+# CHECK_SWIFT: Latency : 4
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(7): %vreg7<def>, %vreg8<def> = UMULL %vreg6, %vreg6, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg7,%vreg8,%vreg6,%vreg6
+# CHECK_A9: Latency : 3
+# CHECK_SWIFT: Latency : 5
+# CHECK_R52: Latency : 4
+#
+# CHECK: SU(11): %vreg13<def,tied4>, %vreg14<def,tied5> = UMLAL %vreg6, %vreg6, %vreg13<tied0>, %vreg14<tied1>, pred:14, pred:%noreg, opt:%noreg; GPR:%vreg13 GPRnopc:%vreg14,%vreg6,%vreg6
+# CHECK_SWIFT: Latency : 7
+# CHECK_A9: Latency : 3
+# CHECK_R52: Latency : 4
+# CHECK: ** ScheduleDAGMILive::schedule picking next node
+...
+---
+name: foo
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnopc }
+ - { id: 1, class: gpr }
+ - { id: 2, class: gpr }
+ - { id: 3, class: gprnopc }
+ - { id: 4, class: gprnopc }
+ - { id: 5, class: gprnopc }
+ - { id: 6, class: gprnopc }
+ - { id: 7, class: gprnopc }
+ - { id: 8, class: gprnopc }
+ - { id: 9, class: gpr }
+ - { id: 10, class: gprnopc }
+ - { id: 11, class: gprnopc }
+ - { id: 12, class: gprnopc }
+ - { id: 13, class: gpr }
+ - { id: 14, class: gprnopc }
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+ - { reg: '%r1', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %r0, %r1
+
+ %1 = COPY %r1
+ %0 = COPY %r0
+ %2 = SMULBB %1, %1, 14, _
+ %3 = SMLABB %0, %0, %2, 14, _
+ %4 = UXTH %3, 0, 14, _
+ %5 = MUL %4, %4, 14, _, _
+ %6 = MLA %5, %5, %5, 14, _, _
+ %7, %8 = UMULL %6, %6, 14, _, _
+ %13, %10 = UMULL %7, %7, 14, _, _
+ %11 = MLA %7, %8, %10, 14, _, _
+ %14 = MLA %7, %8, %11, 14, _, _
+ %13, %14 = UMLAL %6, %6, %13, %14, 14, _, _
+ %r0 = COPY %13
+ %r1 = COPY %14
+ BX_RET 14, _, implicit %r0, implicit %r1
+
+...
diff --git a/test/CodeGen/ARM/movt.ll b/test/CodeGen/ARM/movt.ll
index da9b698f20996..f51582031bd59 100644
--- a/test/CodeGen/ARM/movt.ll
+++ b/test/CodeGen/ARM/movt.ll
@@ -2,10 +2,15 @@
; rdar://7317664
; RUN: llc -mtriple=thumbv8m.base %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8m.base -mcpu=cortex-m23 %s -o - | FileCheck %s --check-prefix=NOMOVT
+; RUN: llc -mtriple=thumbv8m.base -mcpu=cortex-m33 %s -o - | FileCheck %s
define i32 @t(i32 %X) nounwind {
; CHECK-LABEL: t:
; CHECK: movt r{{[0-9]}}, #65535
+; NOMOVT-LABEL: t:
+; NOMOVT-NOT: movt r{{[0-9]}}, #65535
+; NOMOVT: ldr r{{[0-9]}}, .LCP
entry:
%0 = or i32 %X, -65536
ret i32 %0
@@ -14,6 +19,9 @@ entry:
define i32 @t2(i32 %X) nounwind {
; CHECK-LABEL: t2:
; CHECK: movt r{{[0-9]}}, #65534
+; NOMOVT-LABEL: t2:
+; NOMOVT-NOT: movt r{{[0-9]}}, #65534
+; NOMOVT: ldr r{{[0-9]}}, .LCP
entry:
%0 = or i32 %X, -131072
%1 = and i32 %0, -65537
diff --git a/test/CodeGen/ARM/msr-it-block.ll b/test/CodeGen/ARM/msr-it-block.ll
index 0f9ff6b29d795..8d4ddc3a49853 100644
--- a/test/CodeGen/ARM/msr-it-block.ll
+++ b/test/CodeGen/ARM/msr-it-block.ll
@@ -20,8 +20,8 @@ write_reg:
; V6M: msr apsr, {{r[0-9]+}}
; V7M: msr apsr_nzcvq, {{r[0-9]+}}
; V7M: msr apsr_nzcvq, {{r[0-9]+}}
-; V7A: msr APSR_nzcvqg, {{r[0-9]+}}
-; V7A: msr APSR_nzcvqg, {{r[0-9]+}}
+; V7A: msr APSR_nzcvq, {{r[0-9]+}}
+; V7A: msr APSR_nzcvq, {{r[0-9]+}}
br label %exit
exit:
@@ -41,8 +41,8 @@ write_reg:
; V6M: msr apsr, {{r[0-9]+}}
; V7M: msr apsr_nzcvq, {{r[0-9]+}}
; V7M: msr apsr_nzcvq, {{r[0-9]+}}
-; V7A: msr APSR_nzcvqg, {{r[0-9]+}}
-; V7A: msr APSR_nzcvqg, {{r[0-9]+}}
+; V7A: msr APSR_nzcvq, {{r[0-9]+}}
+; V7A: msr APSR_nzcvq, {{r[0-9]+}}
br label %exit
exit:
diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll
index d32e7b78879ba..109d09582afdc 100644
--- a/test/CodeGen/ARM/neon_vabs.ll
+++ b/test/CodeGen/ARM/neon_vabs.ll
@@ -1,8 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
define <4 x i32> @test1(<4 x i32> %a) nounwind {
; CHECK-LABEL: test1:
-; CHECK: vabs.s32 q
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <4 x i32> zeroinitializer, %a
%b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
@@ -11,7 +18,13 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind {
define <4 x i32> @test2(<4 x i32> %a) nounwind {
; CHECK-LABEL: test2:
-; CHECK: vabs.s32 q
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <4 x i32> zeroinitializer, %a
%b = icmp sge <4 x i32> %a, zeroinitializer
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
@@ -20,7 +33,13 @@ define <4 x i32> @test2(<4 x i32> %a) nounwind {
define <8 x i16> @test3(<8 x i16> %a) nounwind {
; CHECK-LABEL: test3:
-; CHECK: vabs.s16 q
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s16 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <8 x i16> zeroinitializer, %a
%b = icmp sgt <8 x i16> %a, zeroinitializer
%abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
@@ -29,7 +48,13 @@ define <8 x i16> @test3(<8 x i16> %a) nounwind {
define <16 x i8> @test4(<16 x i8> %a) nounwind {
; CHECK-LABEL: test4:
-; CHECK: vabs.s8 q
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s8 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <16 x i8> zeroinitializer, %a
%b = icmp slt <16 x i8> %a, zeroinitializer
%abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
@@ -38,7 +63,13 @@ define <16 x i8> @test4(<16 x i8> %a) nounwind {
define <4 x i32> @test5(<4 x i32> %a) nounwind {
; CHECK-LABEL: test5:
-; CHECK: vabs.s32 q
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <4 x i32> zeroinitializer, %a
%b = icmp sle <4 x i32> %a, zeroinitializer
%abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
@@ -47,7 +78,11 @@ define <4 x i32> @test5(<4 x i32> %a) nounwind {
define <2 x i32> @test6(<2 x i32> %a) nounwind {
; CHECK-LABEL: test6:
-; CHECK: vabs.s32 d
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s32 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <2 x i32> zeroinitializer, %a
%b = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
%abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
@@ -56,7 +91,11 @@ define <2 x i32> @test6(<2 x i32> %a) nounwind {
define <2 x i32> @test7(<2 x i32> %a) nounwind {
; CHECK-LABEL: test7:
-; CHECK: vabs.s32 d
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s32 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <2 x i32> zeroinitializer, %a
%b = icmp sge <2 x i32> %a, zeroinitializer
%abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
@@ -65,7 +104,11 @@ define <2 x i32> @test7(<2 x i32> %a) nounwind {
define <4 x i16> @test8(<4 x i16> %a) nounwind {
; CHECK-LABEL: test8:
-; CHECK: vabs.s16 d
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s16 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <4 x i16> zeroinitializer, %a
%b = icmp sgt <4 x i16> %a, zeroinitializer
%abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg
@@ -74,7 +117,11 @@ define <4 x i16> @test8(<4 x i16> %a) nounwind {
define <8 x i8> @test9(<8 x i8> %a) nounwind {
; CHECK-LABEL: test9:
-; CHECK: vabs.s8 d
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s8 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <8 x i8> zeroinitializer, %a
%b = icmp slt <8 x i8> %a, zeroinitializer
%abs = select <8 x i1> %b, <8 x i8> %tmp1neg, <8 x i8> %a
@@ -83,7 +130,11 @@ define <8 x i8> @test9(<8 x i8> %a) nounwind {
define <2 x i32> @test10(<2 x i32> %a) nounwind {
; CHECK-LABEL: test10:
-; CHECK: vabs.s32 d
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vabs.s32 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1neg = sub <2 x i32> zeroinitializer, %a
%b = icmp sle <2 x i32> %a, zeroinitializer
%abs = select <2 x i1> %b, <2 x i32> %tmp1neg, <2 x i32> %a
@@ -95,7 +146,13 @@ define <2 x i32> @test10(<2 x i32> %a) nounwind {
define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind {
; CHECK-LABEL: test11:
-; CHECK: vabdl.u16 q
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u16 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%zext1 = zext <4 x i16> %a to <4 x i32>
%zext2 = zext <4 x i16> %b to <4 x i32>
%diff = sub <4 x i32> %zext1, %zext2
@@ -106,7 +163,13 @@ define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind {
}
define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind {
; CHECK-LABEL: test12:
-; CHECK: vabdl.u8 q
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u8 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%zext1 = zext <8 x i8> %a to <8 x i16>
%zext2 = zext <8 x i8> %b to <8 x i16>
%diff = sub <8 x i16> %zext1, %zext2
@@ -118,7 +181,13 @@ define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind {
define <2 x i64> @test13(<2 x i32> %a, <2 x i32> %b) nounwind {
; CHECK-LABEL: test13:
-; CHECK: vabdl.u32 q
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d16, r2, r3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vabdl.u32 q8, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: mov pc, lr
%zext1 = zext <2 x i32> %a to <2 x i64>
%zext2 = zext <2 x i32> %b to <2 x i64>
%diff = sub <2 x i64> %zext1, %zext2
diff --git a/test/CodeGen/ARM/no-cmov2bfi.ll b/test/CodeGen/ARM/no-cmov2bfi.ll
new file mode 100644
index 0000000000000..c8b5120489054
--- /dev/null
+++ b/test/CodeGen/ARM/no-cmov2bfi.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=thumbv7 | FileCheck --check-prefix=CHECK-NOBFI %s
+
+declare zeroext i1 @dummy()
+
+define i8 @test(i8 %a1, i1 %c) {
+; CHECK-NOBFI-NOT: bfi
+; CHECK-NOBFI: bl dummy
+; CHECK-NOBFI: cmp r0, #0
+; CHECK-NOBFI: it ne
+; CHECK-NOBFI: orrne [[REG:r[0-9]+]], [[REG]], #8
+; CHECK-NOBFI: mov r0, [[REG]]
+
+ %1 = and i8 %a1, -9
+ %2 = select i1 %c, i8 %1, i8 %a1
+ %3 = tail call zeroext i1 @dummy()
+ %4 = or i8 %2, 8
+ %ret = select i1 %3, i8 %4, i8 %2
+ ret i8 %ret
+}
diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll
index ff85052175c85..568f7572b32e9 100644
--- a/test/CodeGen/ARM/phi.ll
+++ b/test/CodeGen/ARM/phi.ll
@@ -1,5 +1,4 @@
; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
-; RUN: llc -mtriple=arm-eabi -mattr=+v4t -addr-sink-using-gep=1 %s -o - | FileCheck %s
; <rdar://problem/8686347>
diff --git a/test/CodeGen/ARM/pr32545.ll b/test/CodeGen/ARM/pr32545.ll
new file mode 100644
index 0000000000000..5bfb01b45983b
--- /dev/null
+++ b/test/CodeGen/ARM/pr32545.ll
@@ -0,0 +1,22 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabi"
+
+; CHECK: vld1.16 {[[DREG:d[0-9]+]][0]}, {{.*}}
+; CHECK: vmovl.u8 [[QREG:q[0-9]+]], [[DREG]]
+; CHECK: vmovl.u16 [[QREG]], [[DREG]]
+
+define void @f(i32 %dstStride, i8* %indvars.iv, <2 x i8>* %zz) {
+entry:
+ br label %for.body
+
+for.body:
+ %tmp = load <2 x i8>, <2 x i8>* %zz, align 1
+ %tmp1 = extractelement <2 x i8> %tmp, i32 0
+ %.lhs.rhs = zext i8 %tmp1 to i32
+ call void @g(i32 %.lhs.rhs)
+ br label %for.body
+}
+
+declare void @g(i32)
diff --git a/test/CodeGen/ARM/prera-ldst-aliasing.mir b/test/CodeGen/ARM/prera-ldst-aliasing.mir
new file mode 100644
index 0000000000000..ce37106ed8d2f
--- /dev/null
+++ b/test/CodeGen/ARM/prera-ldst-aliasing.mir
@@ -0,0 +1,40 @@
+# RUN: llc -run-pass arm-prera-ldst-opt %s -o - | FileCheck %s
+--- |
+ target triple = "thumbv7---eabi"
+
+ define void @ldrd_strd_aa(i32* noalias nocapture %x, i32* noalias nocapture readonly %y) {
+ entry:
+ %0 = load i32, i32* %y, align 4
+ store i32 %0, i32* %x, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %y, i32 1
+ %1 = load i32, i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %x, i32 1
+ store i32 %1, i32* %arrayidx3, align 4
+ ret void
+ }
+...
+---
+name: ldrd_strd_aa
+alignment: 1
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+ - { reg: '%r1', virtual-reg: '%1' }
+body: |
+ bb.0.entry:
+ liveins: %r0, %r1
+
+ %1 : gpr = COPY %r1
+ %0 : gpr = COPY %r0
+ %2 : gpr = t2LDRi12 %1, 0, 14, _ :: (load 4 from %ir.y)
+ t2STRi12 killed %2, %0, 0, 14, _ :: (store 4 into %ir.x)
+ %3 : gpr = t2LDRi12 %1, 4, 14, _ :: (load 4 from %ir.arrayidx2)
+ t2STRi12 killed %3, %0, 4, 14, _ :: (store 4 into %ir.arrayidx3)
+ ; CHECK: t2LDRi12
+ ; CHECK-NEXT: t2LDRi12
+ ; CHECK-NEXT: t2STRi12
+ ; CHECK-NEXT: t2STRi12
+ tBX_RET 14, _
+
+...
+
diff --git a/test/CodeGen/ARM/prera-ldst-insertpt.mir b/test/CodeGen/ARM/prera-ldst-insertpt.mir
new file mode 100644
index 0000000000000..eafcc7c36d334
--- /dev/null
+++ b/test/CodeGen/ARM/prera-ldst-insertpt.mir
@@ -0,0 +1,105 @@
+# RUN: llc -run-pass arm-prera-ldst-opt %s -o - | FileCheck %s
+--- |
+ target triple = "thumbv7---eabi"
+
+ define void @a(i32* nocapture %x, i32 %y, i32 %z) {
+ entry:
+ ret void
+ }
+
+ define void @b(i32* nocapture %x, i32 %y, i32 %z) {
+ entry:
+ ret void
+ }
+...
+---
+# CHECK-LABEL: name: a
+name: a
+alignment: 1
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+ - { reg: '%r1', virtual-reg: '%1' }
+ - { reg: '%r2', virtual-reg: '%2' }
+body: |
+ bb.0.entry:
+ liveins: %r0, %r1, %r2
+
+ %2 : rgpr = COPY %r2
+ %1 : rgpr = COPY %r1
+ %0 : gpr = COPY %r0
+ %3 : rgpr = t2MUL %2, %2, 14, _
+ %4 : rgpr = t2MUL %1, %1, 14, _
+ %5 : rgpr = t2MOVi32imm -858993459
+ %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, _
+ %8 : rgpr, %9 : rgpr = t2UMULL killed %4, %5, 14, _
+ t2STRi12 %1, %0, 0, 14, _ :: (store 4)
+ %10 : rgpr = t2LSLri %2, 1, 14, _, _
+ t2STRi12 killed %10, %0, 4, 14, _ :: (store 4)
+
+ ; Make sure we move the paired stores next to each other, and
+ ; insert them in an appropriate location.
+ ; CHECK: t2STRi12 %1,
+ ; CHECK-NEXT: t2STRi12 killed %10,
+ ; CHECK-NEXT: t2MOVi
+ ; CHECK-NEXT: t2ADDrs
+
+ %11 : rgpr = t2MOVi 55, 14, _, _
+ %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, _, _
+ t2STRi12 killed %12, %0, 16, 14, _ :: (store 4)
+ %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, _, _
+ t2STRi12 killed %13, %0, 20, 14, _ :: (store 4)
+
+ ; Make sure we move the paired stores next to each other.
+ ; CHECK: t2STRi12 killed %12,
+ ; CHECK-NEXT: t2STRi12 killed %13,
+
+ tBX_RET 14, _
+---
+# CHECK-LABEL: name: b
+name: b
+alignment: 1
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+ - { reg: '%r1', virtual-reg: '%1' }
+ - { reg: '%r2', virtual-reg: '%2' }
+body: |
+ bb.0.entry:
+ liveins: %r0, %r1, %r2
+
+ %2 : rgpr = COPY %r2
+ %1 : rgpr = COPY %r1
+ %0 : gpr = COPY %r0
+ t2STRi12 %1, %0, 0, 14, _ :: (store 4)
+ %10 : rgpr = t2LSLri %2, 1, 14, _, _
+ t2STRi12 killed %10, %0, 4, 14, _ :: (store 4)
+ %3 : rgpr = t2MUL %2, %2, 14, _
+ t2STRi12 %3, %0, 8, 14, _ :: (store 4)
+
+ ; Make sure we move the paired stores next to each other, and
+ ; insert them in an appropriate location.
+ ; CHECK: t2STRi12 {{.*}}, 0
+ ; CHECK-NEXT: t2STRi12 {{.*}}, 4
+ ; CHECK-NEXT: t2STRi12 {{.*}}, 8
+ ; CHECK-NEXT: t2MUL
+ ; CHECK-NEXT: t2MOVi32imm
+
+ %4 : rgpr = t2MUL %1, %1, 14, _
+ %5 : rgpr = t2MOVi32imm -858993459
+ %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, _
+ %8 : rgpr, %9 : rgpr = t2UMULL killed %4, %5, 14, _
+ %10 : rgpr = t2LSLri %2, 1, 14, _, _
+ %11 : rgpr = t2MOVi 55, 14, _, _
+ %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, _, _
+ t2STRi12 killed %12, %0, 16, 14, _ :: (store 4)
+ %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, _, _
+ t2STRi12 killed %13, %0, 20, 14, _ :: (store 4)
+
+ ; Make sure we move the paired stores next to each other.
+ ; CHECK: t2STRi12 {{.*}}, 16
+ ; CHECK-NEXT: t2STRi12 {{.*}}, 20
+
+ tBX_RET 14, _
+
+...
diff --git a/test/CodeGen/ARM/rbit.ll b/test/CodeGen/ARM/rbit.ll
index a2bfeca75526d..c8badfb32370c 100644
--- a/test/CodeGen/ARM/rbit.ll
+++ b/test/CodeGen/ARM/rbit.ll
@@ -10,7 +10,8 @@ entry:
; CHECK-LABEL: rbit_constant
; CHECK: mov r0, #0
-; CHECK: rbit r0, r0
+; CHECK-NOT: rbit
+; CHECK: bx lr
define i32 @rbit_constant() {
entry:
%rbit.i = call i32 @llvm.arm.rbit(i32 0)
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index f95f97105b9fc..a36526ff1fb03 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s
define i32 @test1(i32 %X) nounwind {
-; CHECK: test1
+; CHECK-LABEL: test1
; CHECK: rev16 r0, r0
%tmp1 = lshr i32 %X, 8
%X15 = bitcast i32 %X to i32
@@ -17,7 +17,7 @@ define i32 @test1(i32 %X) nounwind {
}
define i32 @test2(i32 %X) nounwind {
-; CHECK: test2
+; CHECK-LABEL: test2
; CHECK: revsh r0, r0
%tmp1 = lshr i32 %X, 8
%tmp1.upgrd.1 = trunc i32 %tmp1 to i16
@@ -58,7 +58,7 @@ entry:
; rdar://9609059
define i32 @test5(i32 %i) nounwind readnone {
entry:
-; CHECK: test5
+; CHECK-LABEL: test5
; CHECK: revsh r0, r0
%shl = shl i32 %i, 24
%shr = ashr exact i32 %shl, 16
@@ -71,7 +71,7 @@ entry:
; rdar://9609108
define i32 @test6(i32 %x) nounwind readnone {
entry:
-; CHECK: test6
+; CHECK-LABEL: test6
; CHECK: rev16 r0, r0
%and = shl i32 %x, 8
%shl = and i32 %and, 65280
@@ -88,7 +88,7 @@ entry:
; rdar://9164521
define i32 @test7(i32 %a) nounwind readnone {
entry:
-; CHECK: test7
+; CHECK-LABEL: test7
; CHECK: rev r0, r0
; CHECK: lsr r0, r0, #16
%and = lshr i32 %a, 8
@@ -101,7 +101,7 @@ entry:
define i32 @test8(i32 %a) nounwind readnone {
entry:
-; CHECK: test8
+; CHECK-LABEL: test8
; CHECK: revsh r0, r0
%and = lshr i32 %a, 8
%shr4 = and i32 %and, 255
@@ -115,7 +115,7 @@ entry:
; rdar://10750814
define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
entry:
-; CHECK: test9
+; CHECK-LABEL: test9
; CHECK: rev16 r0, r0
%conv = zext i16 %v to i32
%shr4 = lshr i32 %conv, 8
diff --git a/test/CodeGen/ARM/select_const.ll b/test/CodeGen/ARM/select_const.ll
new file mode 100644
index 0000000000000..48fe572bf8a72
--- /dev/null
+++ b/test/CodeGen/ARM/select_const.ll
@@ -0,0 +1,326 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-eabi-unknown-unknown | FileCheck %s
+
+; Select of constants: control flow / conditional moves can always be replaced by logic+math (but may not be worth it?).
+; Test the zeroext/signext variants of each pattern to see if that makes a difference.
+
+; select Cond, 0, 1 --> zext (!Cond)
+
+define i32 @select_0_or_1(i1 %cond) {
+; CHECK-LABEL: select_0_or_1:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #1
+; CHECK-NEXT: bic r0, r1, r0
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 0, i32 1
+ ret i32 %sel
+}
+
+define i32 @select_0_or_1_zeroext(i1 zeroext %cond) {
+; CHECK-LABEL: select_0_or_1_zeroext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: eor r0, r0, #1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 0, i32 1
+ ret i32 %sel
+}
+
+define i32 @select_0_or_1_signext(i1 signext %cond) {
+; CHECK-LABEL: select_0_or_1_signext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #1
+; CHECK-NEXT: bic r0, r1, r0
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 0, i32 1
+ ret i32 %sel
+}
+
+; select Cond, 1, 0 --> zext (Cond)
+
+define i32 @select_1_or_0(i1 %cond) {
+; CHECK-LABEL: select_1_or_0:
+; CHECK: @ BB#0:
+; CHECK-NEXT: and r0, r0, #1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 1, i32 0
+ ret i32 %sel
+}
+
+define i32 @select_1_or_0_zeroext(i1 zeroext %cond) {
+; CHECK-LABEL: select_1_or_0_zeroext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 1, i32 0
+ ret i32 %sel
+}
+
+define i32 @select_1_or_0_signext(i1 signext %cond) {
+; CHECK-LABEL: select_1_or_0_signext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: and r0, r0, #1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 1, i32 0
+ ret i32 %sel
+}
+
+; select Cond, 0, -1 --> sext (!Cond)
+
+define i32 @select_0_or_neg1(i1 %cond) {
+; CHECK-LABEL: select_0_or_neg1:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #1
+; CHECK-NEXT: bic r0, r1, r0
+; CHECK-NEXT: rsb r0, r0, #0
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 0, i32 -1
+ ret i32 %sel
+}
+
+define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) {
+; CHECK-LABEL: select_0_or_neg1_zeroext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: eor r0, r0, #1
+; CHECK-NEXT: rsb r0, r0, #0
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 0, i32 -1
+ ret i32 %sel
+}
+
+define i32 @select_0_or_neg1_signext(i1 signext %cond) {
+; CHECK-LABEL: select_0_or_neg1_signext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mvn r0, r0
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 0, i32 -1
+ ret i32 %sel
+}
+
+define i32 @select_0_or_neg1_alt(i1 %cond) {
+; CHECK-LABEL: select_0_or_neg1_alt:
+; CHECK: @ BB#0:
+; CHECK-NEXT: and r0, r0, #1
+; CHECK-NEXT: sub r0, r0, #1
+; CHECK-NEXT: mov pc, lr
+ %z = zext i1 %cond to i32
+ %add = add i32 %z, -1
+ ret i32 %add
+}
+
+define i32 @select_0_or_neg1_alt_zeroext(i1 zeroext %cond) {
+; CHECK-LABEL: select_0_or_neg1_alt_zeroext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: sub r0, r0, #1
+; CHECK-NEXT: mov pc, lr
+ %z = zext i1 %cond to i32
+ %add = add i32 %z, -1
+ ret i32 %add
+}
+
+define i32 @select_0_or_neg1_alt_signext(i1 signext %cond) {
+; CHECK-LABEL: select_0_or_neg1_alt_signext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mvn r0, r0
+; CHECK-NEXT: mov pc, lr
+ %z = zext i1 %cond to i32
+ %add = add i32 %z, -1
+ ret i32 %add
+}
+
+; select Cond, -1, 0 --> sext (Cond)
+
+define i32 @select_neg1_or_0(i1 %cond) {
+; CHECK-LABEL: select_neg1_or_0:
+; CHECK: @ BB#0:
+; CHECK-NEXT: and r0, r0, #1
+; CHECK-NEXT: rsb r0, r0, #0
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 -1, i32 0
+ ret i32 %sel
+}
+
+define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) {
+; CHECK-LABEL: select_neg1_or_0_zeroext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: rsb r0, r0, #0
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 -1, i32 0
+ ret i32 %sel
+}
+
+define i32 @select_neg1_or_0_signext(i1 signext %cond) {
+; CHECK-LABEL: select_neg1_or_0_signext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 -1, i32 0
+ ret i32 %sel
+}
+
+; select Cond, C+1, C --> add (zext Cond), C
+
+define i32 @select_Cplus1_C(i1 %cond) {
+; CHECK-LABEL: select_Cplus1_C:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #41
+; CHECK-NEXT: tst r0, #1
+; CHECK-NEXT: movne r1, #42
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 42, i32 41
+ ret i32 %sel
+}
+
+define i32 @select_Cplus1_C_zeroext(i1 zeroext %cond) {
+; CHECK-LABEL: select_Cplus1_C_zeroext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #41
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: movne r1, #42
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 42, i32 41
+ ret i32 %sel
+}
+
+define i32 @select_Cplus1_C_signext(i1 signext %cond) {
+; CHECK-LABEL: select_Cplus1_C_signext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #41
+; CHECK-NEXT: tst r0, #1
+; CHECK-NEXT: movne r1, #42
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 42, i32 41
+ ret i32 %sel
+}
+
+; select Cond, C, C+1 --> add (sext Cond), C
+
+define i32 @select_C_Cplus1(i1 %cond) {
+; CHECK-LABEL: select_C_Cplus1:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #42
+; CHECK-NEXT: tst r0, #1
+; CHECK-NEXT: movne r1, #41
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 41, i32 42
+ ret i32 %sel
+}
+
+define i32 @select_C_Cplus1_zeroext(i1 zeroext %cond) {
+; CHECK-LABEL: select_C_Cplus1_zeroext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #42
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: movne r1, #41
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 41, i32 42
+ ret i32 %sel
+}
+
+define i32 @select_C_Cplus1_signext(i1 signext %cond) {
+; CHECK-LABEL: select_C_Cplus1_signext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #42
+; CHECK-NEXT: tst r0, #1
+; CHECK-NEXT: movne r1, #41
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 41, i32 42
+ ret i32 %sel
+}
+
+; In general, select of 2 constants could be:
+; select Cond, C1, C2 --> add (mul (zext Cond), C1-C2), C2 --> add (and (sext Cond), C1-C2), C2
+
+define i32 @select_C1_C2(i1 %cond) {
+; CHECK-LABEL: select_C1_C2:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #165
+; CHECK-NEXT: tst r0, #1
+; CHECK-NEXT: orr r1, r1, #256
+; CHECK-NEXT: moveq r1, #42
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 421, i32 42
+ ret i32 %sel
+}
+
+define i32 @select_C1_C2_zeroext(i1 zeroext %cond) {
+; CHECK-LABEL: select_C1_C2_zeroext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #165
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: orr r1, r1, #256
+; CHECK-NEXT: moveq r1, #42
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 421, i32 42
+ ret i32 %sel
+}
+
+define i32 @select_C1_C2_signext(i1 signext %cond) {
+; CHECK-LABEL: select_C1_C2_signext:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #165
+; CHECK-NEXT: tst r0, #1
+; CHECK-NEXT: orr r1, r1, #256
+; CHECK-NEXT: moveq r1, #42
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i32 421, i32 42
+ ret i32 %sel
+}
+
+; 4295032833 = 0x100010001.
+; This becomes an opaque constant via ConstantHoisting, so we don't fold it into the select.
+
+define i64 @opaque_constant1(i1 %cond, i64 %x) {
+; CHECK-LABEL: opaque_constant1:
+; CHECK: @ BB#0:
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: ands r12, r0, #1
+; CHECK-NEXT: mov lr, #1
+; CHECK-NEXT: mov r0, #23
+; CHECK-NEXT: eor r3, r3, #1
+; CHECK-NEXT: orr lr, lr, #65536
+; CHECK-NEXT: mvnne r0, #3
+; CHECK-NEXT: movne r12, #1
+; CHECK-NEXT: and r4, r0, lr
+; CHECK-NEXT: eor r2, r2, lr
+; CHECK-NEXT: subs r0, r4, #1
+; CHECK-NEXT: sbc r1, r12, #0
+; CHECK-NEXT: orrs r2, r2, r3
+; CHECK-NEXT: movne r0, r4
+; CHECK-NEXT: movne r1, r12
+; CHECK-NEXT: pop {r4, lr}
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i64 -4, i64 23
+ %bo = and i64 %sel, 4295032833 ; 0x100010001
+ %cmp = icmp eq i64 %x, 4295032833
+ %sext = sext i1 %cmp to i64
+ %add = add i64 %bo, %sext
+ ret i64 %add
+}
+
+; 65537 == 0x10001.
+; This becomes an opaque constant via ConstantHoisting, so we don't fold it into the select.
+
+define i64 @opaque_constant2(i1 %cond, i64 %x) {
+; CHECK-LABEL: opaque_constant2:
+; CHECK: @ BB#0:
+; CHECK-NEXT: mov r1, #1
+; CHECK-NEXT: tst r0, #1
+; CHECK-NEXT: orr r1, r1, #65536
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: moveq r0, #23
+; CHECK-NEXT: and r0, r0, r1
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: mov pc, lr
+ %sel = select i1 %cond, i64 65537, i64 23
+ %bo = and i64 %sel, 65537
+ ret i64 %bo
+}
+
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 8c1502e146550..09e8ed4bc096a 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -223,21 +223,19 @@ entry:
ret i32 %add
}
-; Do not fold the xor into the select
+; Fold the xor into the select.
define i32 @t15(i32 %p) {
entry:
; ARM-LABEL: t15:
-; ARM: mov [[REG:r[0-9]+]], #2
+; ARM: mov [[REG:r[0-9]+]], #3
; ARM: cmp r0, #8
-; ARM: movwgt [[REG:r[0-9]+]], #1
-; ARM: eor r0, [[REG:r[0-9]+]], #1
+; ARM: movwgt [[REG:r[0-9]+]], #0
; T2-LABEL: t15:
-; T2: movs [[REG:r[0-9]+]], #2
+; T2: movs [[REG:r[0-9]+]], #3
; T2: cmp [[REG:r[0-9]+]], #8
; T2: it gt
-; T2: movgt [[REG:r[0-9]+]], #1
-; T2: eor r0, [[REG:r[0-9]+]], #1
+; T2: movgt [[REG:r[0-9]+]], #0
%cmp = icmp sgt i32 %p, 8
%a = select i1 %cmp, i32 1, i32 2
%xor = xor i32 %a, 1
diff --git a/test/CodeGen/ARM/setcc-logic.ll b/test/CodeGen/ARM/setcc-logic.ll
new file mode 100644
index 0000000000000..79bae1facb3e5
--- /dev/null
+++ b/test/CodeGen/ARM/setcc-logic.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s
+
+define zeroext i1 @ne_neg1_and_ne_zero(i32 %x) nounwind {
+; CHECK-LABEL: ne_neg1_and_ne_zero:
+; CHECK: @ BB#0:
+; CHECK-NEXT: add r1, r0, #1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: cmp r1, #1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: bx lr
+ %cmp1 = icmp ne i32 %x, -1
+ %cmp2 = icmp ne i32 %x, 0
+ %and = and i1 %cmp1, %cmp2
+ ret i1 %and
+}
+
+; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401
+
+define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK-LABEL: and_eq:
+; CHECK: @ BB#0:
+; CHECK-NEXT: eor r2, r2, r3
+; CHECK-NEXT: eor r0, r0, r1
+; CHECK-NEXT: orrs r0, r0, r2
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movweq r0, #1
+; CHECK-NEXT: bx lr
+ %cmp1 = icmp eq i32 %a, %b
+ %cmp2 = icmp eq i32 %c, %d
+ %and = and i1 %cmp1, %cmp2
+ ret i1 %and
+}
+
+define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK-LABEL: or_ne:
+; CHECK: @ BB#0:
+; CHECK-NEXT: eor r2, r2, r3
+; CHECK-NEXT: eor r0, r0, r1
+; CHECK-NEXT: orrs r0, r0, r2
+; CHECK-NEXT: movwne r0, #1
+; CHECK-NEXT: bx lr
+ %cmp1 = icmp ne i32 %a, %b
+ %cmp2 = icmp ne i32 %c, %d
+ %or = or i1 %cmp1, %cmp2
+ ret i1 %or
+}
+
+define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) nounwind {
+; CHECK-LABEL: and_eq_vec:
+; CHECK: @ BB#0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: vmov d19, r2, r3
+; CHECK-NEXT: add r12, sp, #40
+; CHECK-NEXT: add lr, sp, #8
+; CHECK-NEXT: vmov d18, r0, r1
+; CHECK-NEXT: vld1.64 {d16, d17}, [lr]
+; CHECK-NEXT: add r0, sp, #24
+; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
+; CHECK-NEXT: vceq.i32 q8, q9, q8
+; CHECK-NEXT: vld1.64 {d22, d23}, [r0]
+; CHECK-NEXT: vceq.i32 q9, q11, q10
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vmovn.i32 d17, q9
+; CHECK-NEXT: vand d16, d16, d17
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: pop {r11, pc}
+ %cmp1 = icmp eq <4 x i32> %a, %b
+ %cmp2 = icmp eq <4 x i32> %c, %d
+ %and = and <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %and
+}
+
diff --git a/test/CodeGen/ARM/setcc-sentinals.ll b/test/CodeGen/ARM/setcc-sentinals.ll
deleted file mode 100644
index dc45e0e13881d..0000000000000
--- a/test/CodeGen/ARM/setcc-sentinals.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -asm-verbose=false %s -o - | FileCheck %s
-
-define zeroext i1 @test0(i32 %x) nounwind {
-; CHECK-LABEL: test0:
-; CHECK: add [[REG:(r[0-9]+)|(lr)]], r0, #1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: cmp [[REG]], #1
-; CHECK-NEXT: movwhi r0, #1
-; CHECK-NEXT: bx lr
- %cmp1 = icmp ne i32 %x, -1
- %not.cmp = icmp ne i32 %x, 0
- %.cmp1 = and i1 %cmp1, %not.cmp
- ret i1 %.cmp1
-}
diff --git a/test/CodeGen/ARM/single-issue-r52.mir b/test/CodeGen/ARM/single-issue-r52.mir
new file mode 100644
index 0000000000000..6c95f7603e6e0
--- /dev/null
+++ b/test/CodeGen/ARM/single-issue-r52.mir
@@ -0,0 +1,86 @@
+# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-topdown 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=TOPDOWN
+# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-bottomup 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOTTOMUP
+# REQUIRES: asserts
+--- |
+ ; ModuleID = 'foo.ll'
+ source_filename = "foo.ll"
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "arm---eabi"
+
+ %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
+ ; Function Attrs: nounwind
+ define <8 x i8> @foo(i8* %A) {
+ %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8)
+ %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 1
+ %tmp4 = add <8 x i8> %tmp2, %tmp3
+ ret <8 x i8> %tmp4
+ }
+ declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8*, i32)
+
+# CHECK: ********** MI Scheduling **********
+# CHECK: ScheduleDAGMILive::schedule starting
+# CHECK: SU(1): %vreg1<def> = VLD4d8Pseudo %vreg0, 8, pred:14, pred:%noreg; mem:LD32[%A](align=8) QQPR:%vreg1 GPR:%vreg0
+# CHECK: Latency : 8
+# CHECK: Single Issue : true;
+# CHECK: SU(2): %vreg4<def> = VADDv8i8 %vreg1:dsub_0, %vreg1:dsub_1, pred:14, pred:%noreg; DPR:%vreg4 QQPR:%vreg1
+# CHECK: Latency : 5
+# CHECK: Single Issue : false;
+# CHECK: SU(3): %vreg5<def>, %vreg6<def> = VMOVRRD %vreg4, pred:14, pred:%noreg; GPR:%vreg5,%vreg6 DPR:%vreg4
+# CHECK: Latency : 4
+# CHECK: Single Issue : false;
+
+# TOPDOWN: Scheduling SU(1) %vreg1<def> = VLD4d8Pseudo
+# TOPDOWN: Bump cycle to end group
+# TOPDOWN: Scheduling SU(2) %vreg4<def> = VADDv8i8
+
+# BOTTOMUP: Scheduling SU(2) %vreg4<def> = VADDv8i8
+# BOTTOMUP: Scheduling SU(1) %vreg1<def> = VLD4d8Pseudo
+# BOTTOMUP: Bump cycle to begin group
+
+...
+---
+name: foo
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr }
+ - { id: 1, class: qqpr }
+ - { id: 2, class: dpr }
+ - { id: 3, class: dpr }
+ - { id: 4, class: dpr }
+ - { id: 5, class: gpr }
+ - { id: 6, class: gpr }
+liveins:
+ - { reg: '%r0', virtual-reg: '%0' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %r0
+
+ %0 = COPY %r0
+ %1 = VLD4d8Pseudo %0, 8, 14, _ :: (load 32 from %ir.A, align 8)
+ %4 = VADDv8i8 %1.dsub_0, %1.dsub_1, 14, _
+ %5, %6 = VMOVRRD %4, 14, _
+ %r0 = COPY %5
+ %r1 = COPY %6
+ BX_RET 14, _, implicit %r0, implicit killed %r1
+
+...
diff --git a/test/CodeGen/ARM/sjljeh-swifterror.ll b/test/CodeGen/ARM/sjljeh-swifterror.ll
new file mode 100644
index 0000000000000..aae0e75c98afb
--- /dev/null
+++ b/test/CodeGen/ARM/sjljeh-swifterror.ll
@@ -0,0 +1,27 @@
+; RUN: opt -sjljehprepare -verify < %s | FileCheck %s
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "armv7s-apple-ios7.0"
+
+%swift.error = type opaque
+
+declare void @objc_msgSend() local_unnamed_addr
+
+declare i32 @__objc_personality_v0(...)
+
+; Make sure we don't leave a select on a swifterror argument.
+; CHECK-LABEL; @test
+; CHECK-NOT: select true, %0
+define swiftcc void @test(%swift.error** swifterror) local_unnamed_addr personality i32 (...)* @__objc_personality_v0 {
+entry:
+ %call28.i = invoke i32 bitcast (void ()* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+ to label %invoke.cont.i unwind label %lpad.i
+
+invoke.cont.i:
+ unreachable
+
+lpad.i:
+ %1 = landingpad { i8*, i32 }
+ cleanup
+ resume { i8*, i32 } undef
+}
+
diff --git a/test/CodeGen/ARM/smml.ll b/test/CodeGen/ARM/smml.ll
index aa093192f2b22..4788644cf1958 100644
--- a/test/CodeGen/ARM/smml.ll
+++ b/test/CodeGen/ARM/smml.ll
@@ -1,20 +1,15 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
-; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6
-; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7
-; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMB
-; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMB
-; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMBV6T2
-; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMBV7
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V4
+; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V6
+; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V6
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMB
+; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMBV6
+; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMBV6T2
+; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMBV6T2
define i32 @Test0(i32 %a, i32 %b, i32 %c) nounwind readnone ssp {
entry:
; CHECK-LABEL: Test0
; CHECK-NOT: smmls
-; CHECK-V6-NOT: smmls
-; CHECK-V7-NOT: smmls
-; CHECK_THUMB-NOT: smmls
-; CHECK-THUMBV6T2-NOT: smmls
-; CHECK-THUMBV7-NOT: smmls
%conv4 = zext i32 %a to i64
%conv1 = sext i32 %b to i64
%conv2 = sext i32 %c to i64
@@ -27,12 +22,11 @@ entry:
define i32 @Test1(i32 %a, i32 %b, i32 %c) {
;CHECK-LABEL: Test1
-;CHECK-NOT: smmls
+;CHECK-V4-NOT: smmls
;CHECK-THUMB-NOT: smmls
+;CHECK-THUMBV6-NOT: smmls
;CHECK-V6: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0
-;CHECK-V7: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0
;CHECK-THUMBV6T2: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0
-;CHECK-THUMBV7: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0
entry:
%conv = sext i32 %b to i64
%conv1 = sext i32 %c to i64
@@ -47,10 +41,21 @@ entry:
declare void @opaque(i32)
define void @test_used_flags(i32 %in1, i32 %in2) {
-; CHECK-V7-LABEL: test_used_flags:
-; CHECK-V7: smull [[PROD_LO:r[0-9]+]], [[PROD_HI:r[0-9]+]], r0, r1
-; CHECK-V7: rsbs {{.*}}, [[PROD_LO]], #0
-; CHECK-V7: rscs {{.*}}, [[PROD_HI]], #0
+; CHECK-LABEL: test_used_flags:
+; CHECK-THUMB: cmp r1, #0
+; CHECK-THUMB: push {r2}
+; CHECK-THUMB: pop {r3}
+; CHECK-THUMB: ble
+; CHECK-THUMBV6: cmp r1, #0
+; CHECK-THUMBV6: mov r3, r2
+; CHECK-THUMBV6: ble
+; CHECK-V6: smull [[PROD_LO:r[0-9]+]], [[PROD_HI:r[0-9]+]], r0, r1
+; CHECK-V6: rsbs {{.*}}, [[PROD_LO]], #0
+; CHECK-V6: rscs {{.*}}, [[PROD_HI]], #0
+; CHECK-THUMBV6T2: smull [[PROD_LO:r[0-9]+]], [[PROD_HI:r[0-9]+]], r0, r1
+; CHECK-THUMBV6T2: movs [[ZERO:r[0-9]+]], #0
+; CHECK-THUMBV6T2: rsbs {{.*}}, [[PROD_LO]], #0
+; CHECK-THUMBV6T2: sbcs.w {{.*}}, [[ZERO]], [[PROD_HI]]
%in1.64 = sext i32 %in1 to i64
%in2.64 = sext i32 %in2 to i64
%mul = mul nsw i64 %in1.64, %in2.64
diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll
index 3c187aa846d54..2b7be41ddb24e 100644
--- a/test/CodeGen/ARM/smul.ll
+++ b/test/CodeGen/ARM/smul.ll
@@ -262,3 +262,32 @@ define i32 @f21(i32 %a, i32 %x, i16 %y) {
%tmp5 = add i32 %a, %tmp4
ret i32 %tmp5
}
+
+@global_b = external global i16, align 2
+
+define i32 @f22(i32 %a) {
+; CHECK-LABEL: f22:
+; CHECK: smulwb r0, r0, r1
+; CHECK-THUMBV6-NOT: smulwb
+ %b = load i16, i16* @global_b, align 2
+ %sext = sext i16 %b to i64
+ %conv = sext i32 %a to i64
+ %mul = mul nsw i64 %sext, %conv
+ %shr37 = lshr i64 %mul, 16
+ %conv4 = trunc i64 %shr37 to i32
+ ret i32 %conv4
+}
+
+define i32 @f23(i32 %a, i32 %c) {
+; CHECK-LABEL: f23:
+; CHECK: smlawb r0, r0, r2, r1
+; CHECK-THUMBV6-NOT: smlawb
+ %b = load i16, i16* @global_b, align 2
+ %sext = sext i16 %b to i64
+ %conv = sext i32 %a to i64
+ %mul = mul nsw i64 %sext, %conv
+ %shr49 = lshr i64 %mul, 16
+ %conv5 = trunc i64 %shr49 to i32
+ %add = add nsw i32 %conv5, %c
+ ret i32 %add
+}
diff --git a/test/CodeGen/ARM/softfp-fabs-fneg.ll b/test/CodeGen/ARM/softfp-fabs-fneg.ll
index b608fb840218a..b7c684d35b571 100644
--- a/test/CodeGen/ARM/softfp-fabs-fneg.ll
+++ b/test/CodeGen/ARM/softfp-fabs-fneg.ll
@@ -14,8 +14,7 @@ define double @f(double %a) {
define float @g(float %a) {
; CHECK-LABEL: g:
- ; CHECK-THUMB: bic r0, r0, #-2147483648
- ; CHECK-ARM: bfc r0, #31, #1
+ ; CHECK: bic r0, r0, #-2147483648
; CHECK-NEXT: bx lr
%x = call float @llvm.fabs.f32(float %a) readnone
ret float %x
diff --git a/test/CodeGen/ARM/special-reg-mcore.ll b/test/CodeGen/ARM/special-reg-mcore.ll
index 45e6db9e78fe1..1ecf8dc77a701 100644
--- a/test/CodeGen/ARM/special-reg-mcore.ll
+++ b/test/CodeGen/ARM/special-reg-mcore.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m4 2>&1 | FileCheck %s --check-prefix=MCORE
+; RUN: llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m4 --show-mc-encoding 2>&1 | FileCheck %s --check-prefix=MCORE
; RUN: not llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-m3 2>&1 | FileCheck %s --check-prefix=M3CORE
; RUN: not llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s --check-prefix=ACORE
@@ -8,20 +8,20 @@
define i32 @read_mclass_registers() nounwind {
entry:
; MCORE-LABEL: read_mclass_registers:
- ; MCORE: mrs r0, apsr
- ; MCORE: mrs r1, iapsr
- ; MCORE: mrs r1, eapsr
- ; MCORE: mrs r1, xpsr
- ; MCORE: mrs r1, ipsr
- ; MCORE: mrs r1, epsr
- ; MCORE: mrs r1, iepsr
- ; MCORE: mrs r1, msp
- ; MCORE: mrs r1, psp
- ; MCORE: mrs r1, primask
- ; MCORE: mrs r1, basepri
- ; MCORE: mrs r1, basepri_max
- ; MCORE: mrs r1, faultmask
- ; MCORE: mrs r1, control
+ ; MCORE: mrs r0, apsr @ encoding: [0xef,0xf3,0x00,0x80]
+ ; MCORE: mrs r1, iapsr @ encoding: [0xef,0xf3,0x01,0x81]
+ ; MCORE: mrs r1, eapsr @ encoding: [0xef,0xf3,0x02,0x81]
+ ; MCORE: mrs r1, xpsr @ encoding: [0xef,0xf3,0x03,0x81]
+ ; MCORE: mrs r1, ipsr @ encoding: [0xef,0xf3,0x05,0x81]
+ ; MCORE: mrs r1, epsr @ encoding: [0xef,0xf3,0x06,0x81]
+ ; MCORE: mrs r1, iepsr @ encoding: [0xef,0xf3,0x07,0x81]
+ ; MCORE: mrs r1, msp @ encoding: [0xef,0xf3,0x08,0x81]
+ ; MCORE: mrs r1, psp @ encoding: [0xef,0xf3,0x09,0x81]
+ ; MCORE: mrs r1, primask @ encoding: [0xef,0xf3,0x10,0x81]
+ ; MCORE: mrs r1, basepri @ encoding: [0xef,0xf3,0x11,0x81]
+ ; MCORE: mrs r1, basepri_max @ encoding: [0xef,0xf3,0x12,0x81]
+ ; MCORE: mrs r1, faultmask @ encoding: [0xef,0xf3,0x13,0x81]
+ ; MCORE: mrs r1, control @ encoding: [0xef,0xf3,0x14,0x81]
%0 = call i32 @llvm.read_register.i32(metadata !0)
%1 = call i32 @llvm.read_register.i32(metadata !4)
@@ -56,32 +56,32 @@ entry:
define void @write_mclass_registers(i32 %x) nounwind {
entry:
; MCORE-LABEL: write_mclass_registers:
- ; MCORE: msr apsr_nzcvqg, r0
- ; MCORE: msr apsr_nzcvq, r0
- ; MCORE: msr apsr_g, r0
- ; MCORE: msr apsr_nzcvqg, r0
- ; MCORE: msr iapsr_nzcvqg, r0
- ; MCORE: msr iapsr_nzcvq, r0
- ; MCORE: msr iapsr_g, r0
- ; MCORE: msr iapsr_nzcvqg, r0
- ; MCORE: msr eapsr_nzcvqg, r0
- ; MCORE: msr eapsr_nzcvq, r0
- ; MCORE: msr eapsr_g, r0
- ; MCORE: msr eapsr_nzcvqg, r0
- ; MCORE: msr xpsr_nzcvqg, r0
- ; MCORE: msr xpsr_nzcvq, r0
- ; MCORE: msr xpsr_g, r0
- ; MCORE: msr xpsr_nzcvqg, r0
- ; MCORE: msr ipsr, r0
- ; MCORE: msr epsr, r0
- ; MCORE: msr iepsr, r0
- ; MCORE: msr msp, r0
- ; MCORE: msr psp, r0
- ; MCORE: msr primask, r0
- ; MCORE: msr basepri, r0
- ; MCORE: msr basepri_max, r0
- ; MCORE: msr faultmask, r0
- ; MCORE: msr control, r0
+ ; MCORE: msr apsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x00,0x88]
+ ; MCORE: msr apsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x00,0x88]
+ ; MCORE: msr apsr_g, r0 @ encoding: [0x80,0xf3,0x00,0x84]
+ ; MCORE: msr apsr_nzcvqg, r0 @ encoding: [0x80,0xf3,0x00,0x8c]
+ ; MCORE: msr iapsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x01,0x88]
+ ; MCORE: msr iapsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x01,0x88]
+ ; MCORE: msr iapsr_g, r0 @ encoding: [0x80,0xf3,0x01,0x84]
+ ; MCORE: msr iapsr_nzcvqg, r0 @ encoding: [0x80,0xf3,0x01,0x8c]
+ ; MCORE: msr eapsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x02,0x88]
+ ; MCORE: msr eapsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x02,0x88]
+ ; MCORE: msr eapsr_g, r0 @ encoding: [0x80,0xf3,0x02,0x84]
+ ; MCORE: msr eapsr_nzcvqg, r0 @ encoding: [0x80,0xf3,0x02,0x8c]
+ ; MCORE: msr xpsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x03,0x88]
+ ; MCORE: msr xpsr_nzcvq, r0 @ encoding: [0x80,0xf3,0x03,0x88]
+ ; MCORE: msr xpsr_g, r0 @ encoding: [0x80,0xf3,0x03,0x84]
+ ; MCORE: msr xpsr_nzcvqg, r0 @ encoding: [0x80,0xf3,0x03,0x8c]
+ ; MCORE: msr ipsr, r0 @ encoding: [0x80,0xf3,0x05,0x88]
+ ; MCORE: msr epsr, r0 @ encoding: [0x80,0xf3,0x06,0x88]
+ ; MCORE: msr iepsr, r0 @ encoding: [0x80,0xf3,0x07,0x88]
+ ; MCORE: msr msp, r0 @ encoding: [0x80,0xf3,0x08,0x88]
+ ; MCORE: msr psp, r0 @ encoding: [0x80,0xf3,0x09,0x88]
+ ; MCORE: msr primask, r0 @ encoding: [0x80,0xf3,0x10,0x88]
+ ; MCORE: msr basepri, r0 @ encoding: [0x80,0xf3,0x11,0x88]
+ ; MCORE: msr basepri_max, r0 @ encoding: [0x80,0xf3,0x12,0x88]
+ ; MCORE: msr faultmask, r0 @ encoding: [0x80,0xf3,0x13,0x88]
+ ; MCORE: msr control, r0 @ encoding: [0x80,0xf3,0x14,0x88]
call void @llvm.write_register.i32(metadata !0, i32 %x)
call void @llvm.write_register.i32(metadata !1, i32 %x)
diff --git a/test/CodeGen/ARM/special-reg-v8m-main.ll b/test/CodeGen/ARM/special-reg-v8m-main.ll
index cde296c6b218f..ea9c01487d854 100644
--- a/test/CodeGen/ARM/special-reg-v8m-main.ll
+++ b/test/CodeGen/ARM/special-reg-v8m-main.ll
@@ -90,19 +90,19 @@ entry:
define void @write_mclass_registers(i32 %x) nounwind {
entry:
; MAINLINE-LABEL: write_mclass_registers:
- ; MAINLINE: msr apsr_nzcvqg, r0
+ ; MAINLINE: msr apsr_nzcvq, r0
; MAINLINE: msr apsr_nzcvq, r0
; MAINLINE: msr apsr_g, r0
; MAINLINE: msr apsr_nzcvqg, r0
- ; MAINLINE: msr iapsr_nzcvqg, r0
+ ; MAINLINE: msr iapsr_nzcvq, r0
; MAINLINE: msr iapsr_nzcvq, r0
; MAINLINE: msr iapsr_g, r0
; MAINLINE: msr iapsr_nzcvqg, r0
- ; MAINLINE: msr eapsr_nzcvqg, r0
+ ; MAINLINE: msr eapsr_nzcvq, r0
; MAINLINE: msr eapsr_nzcvq, r0
; MAINLINE: msr eapsr_g, r0
; MAINLINE: msr eapsr_nzcvqg, r0
- ; MAINLINE: msr xpsr_nzcvqg, r0
+ ; MAINLINE: msr xpsr_nzcvq, r0
; MAINLINE: msr xpsr_nzcvq, r0
; MAINLINE: msr xpsr_g, r0
; MAINLINE: msr xpsr_nzcvqg, r0
diff --git a/test/CodeGen/ARM/stack_guard_remat.ll b/test/CodeGen/ARM/stack_guard_remat.ll
index 99d4994984506..9b5677608d266 100644
--- a/test/CodeGen/ARM/stack_guard_remat.ll
+++ b/test/CodeGen/ARM/stack_guard_remat.ll
@@ -51,20 +51,20 @@
define i32 @test_stack_guard_remat() #0 {
%a1 = alloca [256 x i32], align 4
%1 = bitcast [256 x i32]* %a1 to i8*
- call void @llvm.lifetime.start(i64 1024, i8* %1)
+ call void @llvm.lifetime.start.p0i8(i64 1024, i8* %1)
%2 = getelementptr inbounds [256 x i32], [256 x i32]* %a1, i32 0, i32 0
call void @foo3(i32* %2) #3
call void asm sideeffect "foo2", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"()
- call void @llvm.lifetime.end(i64 1024, i8* %1)
+ call void @llvm.lifetime.end.p0i8(i64 1024, i8* %1)
ret i32 0
}
; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
declare void @foo3(i32*)
; Function Attrs: nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/ARM/static-addr-hoisting.ll b/test/CodeGen/ARM/static-addr-hoisting.ll
index 3d47e02f965e8..683d607936b85 100644
--- a/test/CodeGen/ARM/static-addr-hoisting.ll
+++ b/test/CodeGen/ARM/static-addr-hoisting.ll
@@ -6,9 +6,9 @@ define void @multiple_store() {
; CHECK: movs [[VAL:r[0-9]+]], #42
; CHECK: movt r[[BASE1]], #15
-; CHECK: str [[VAL]], [r[[BASE1]]]
-; CHECK: str [[VAL]], [r[[BASE1]], #24]
-; CHECK: str.w [[VAL]], [r[[BASE1]], #42]
+; CHECK-DAG: str [[VAL]], [r[[BASE1]]]
+; CHECK-DAG: str [[VAL]], [r[[BASE1]], #24]
+; CHECK-DAG: str.w [[VAL]], [r[[BASE1]], #42]
; CHECK: movw r[[BASE2:[0-9]+]], #20394
; CHECK: movt r[[BASE2]], #18
diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll
index 37e9a4af3be59..475b80b3bb070 100644
--- a/test/CodeGen/ARM/tail-opts.ll
+++ b/test/CodeGen/ARM/tail-opts.ll
@@ -65,3 +65,55 @@ altret:
call void @far(i32 1001)
ret void
}
+
+; Use alternating abort functions so that the blocks we wish to merge are not
+; layout successors during branch folding.
+
+; CHECK-LABEL: merge_alternating_aborts:
+; CHECK-NOT: _abort
+; CHECK-NOT: _alt_abort
+; CHECK: bxne lr
+; CHECK-NOT: _abort
+; CHECK-NOT: _alt_abort
+; CHECK: LBB{{.*}}:
+; CHECK: mov lr, pc
+; CHECK: b _alt_abort
+; CHECK-NOT: _abort
+; CHECK-NOT: _alt_abort
+; CHECK: LBB{{.*}}:
+; CHECK: mov lr, pc
+; CHECK: b _abort
+; CHECK-NOT: _abort
+; CHECK-NOT: _alt_abort
+
+declare void @abort()
+declare void @alt_abort()
+
+define void @merge_alternating_aborts() {
+entry:
+ %c1 = call i1 @qux()
+ br i1 %c1, label %cont1, label %abort1
+abort1:
+ call void @abort()
+ unreachable
+cont1:
+ %c2 = call i1 @qux()
+ br i1 %c2, label %cont2, label %abort2
+abort2:
+ call void @alt_abort()
+ unreachable
+cont2:
+ %c3 = call i1 @qux()
+ br i1 %c3, label %cont3, label %abort3
+abort3:
+ call void @abort()
+ unreachable
+cont3:
+ %c4 = call i1 @qux()
+ br i1 %c4, label %cont4, label %abort4
+abort4:
+ call void @alt_abort()
+ unreachable
+cont4:
+ ret void
+}
diff --git a/test/CodeGen/ARM/thumb1-div.ll b/test/CodeGen/ARM/thumb1-div.ll
new file mode 100644
index 0000000000000..844dfe6f963c1
--- /dev/null
+++ b/test/CodeGen/ARM/thumb1-div.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-m23 -march=thumb | \
+; RUN: FileCheck %s -check-prefix=CHECK
+
+define i32 @f1(i32 %a, i32 %b) {
+entry:
+; CHECK-LABEL: f1
+
+; CHECK: sdiv
+ %tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+entry:
+; CHECK-LABEL: f2
+; CHECK: udiv
+ %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+entry:
+; CHECK-LABEL: f3
+
+
+ %tmp1 = srem i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+; CHECK: sdiv
+; CHECK-NEXT: muls
+; CHECK-NEXT: subs
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+entry:
+; CHECK-LABEL: f4
+
+; CHECK: udiv
+; CHECK-NEXT: muls
+; CHECK-NEXT: subs
+ %tmp1 = urem i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+
+define i64 @f5(i64 %a, i64 %b) {
+entry:
+; CHECK-LABEL: f5
+
+; EABI MODE = Remainder in R2-R3, quotient in R0-R1
+; CHECK: __aeabi_ldivmod
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+ %tmp1 = srem i64 %a, %b ; <i64> [#uses=1]
+ ret i64 %tmp1
+}
+
+define i64 @f6(i64 %a, i64 %b) {
+entry:
+; CHECK-LABEL: f6
+
+; EABI MODE = Remainder in R2-R3, quotient in R0-R1
+; CHECK: __aeabi_uldivmod
+; CHECK: mov r0, r2
+; CHECK: mov r1, r3
+ %tmp1 = urem i64 %a, %b ; <i64> [#uses=1]
+ ret i64 %tmp1
+}
diff --git a/test/CodeGen/ARM/unschedule-first-call.ll b/test/CodeGen/ARM/unschedule-first-call.ll
new file mode 100644
index 0000000000000..4a218afcc5e13
--- /dev/null
+++ b/test/CodeGen/ARM/unschedule-first-call.ll
@@ -0,0 +1,136 @@
+; RUN: llc < %s
+; PR30911
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv6kz--linux-gnueabihf"
+
+; Function Attrs: nounwind
+define void @dradbg(i32, i32, float*, float*, float*, float*, float*) #0 {
+ br i1 undef, label %.critedge, label %8
+
+.critedge: ; preds = %7
+ %.mux2 = select i1 undef, i1 undef, i1 true
+ br label %8
+
+; <label>:8: ; preds = %.critedge, %7
+ %9 = getelementptr float, float* %3, i64 undef
+ %10 = ptrtoint float* %9 to i32
+ %11 = icmp ule i32 %10, undef
+ %12 = getelementptr float, float* %5, i64 undef
+ %13 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
+ %14 = extractvalue { i64, i1 } %13, 0
+ %15 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %14, i64 1)
+ %16 = extractvalue { i64, i1 } %15, 0
+ %17 = icmp slt i64 1, %16
+ %18 = select i1 %17, i64 1, i64 %16
+ %19 = sext i32 %1 to i64
+ %20 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %18, i64 %19)
+ %21 = extractvalue { i64, i1 } %20, 0
+ %22 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %21, i64 0)
+ %23 = extractvalue { i64, i1 } %22, 0
+ %24 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %23, i64 undef)
+ %25 = extractvalue { i64, i1 } %24, 0
+ %26 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %25, i64 0)
+ %27 = extractvalue { i64, i1 } %26, 0
+ %28 = getelementptr float, float* %3, i64 %27
+ %29 = ptrtoint float* %12 to i32
+ %30 = ptrtoint float* %28 to i32
+ %31 = icmp ule i32 %29, %30
+ %32 = or i1 %11, %31
+ %33 = and i1 false, %32
+ %34 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 0, i64 undef)
+ %35 = extractvalue { i64, i1 } %34, 0
+ %36 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %35, i64 1)
+ %37 = extractvalue { i64, i1 } %36, 0
+ %38 = icmp slt i64 1, %37
+ %39 = select i1 %38, i64 1, i64 %37
+ %40 = sext i32 %1 to i64
+ %41 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %39, i64 %40)
+ %42 = extractvalue { i64, i1 } %41, 0
+ %43 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %42, i64 0)
+ %44 = extractvalue { i64, i1 } %43, 0
+ %45 = sext i32 %0 to i64
+ %46 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %44, i64 %45)
+ %47 = extractvalue { i64, i1 } %46, 0
+ %48 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %47, i64 0)
+ %49 = extractvalue { i64, i1 } %48, 0
+ %50 = getelementptr float, float* %5, i64 %49
+ %51 = ptrtoint float* %50 to i32
+ %52 = icmp ule i32 undef, %51
+ %53 = getelementptr float, float* %4, i64 undef
+ %54 = ptrtoint float* %53 to i32
+ %55 = icmp ule i32 undef, %54
+ %56 = or i1 %52, %55
+ %57 = and i1 %33, %56
+ %58 = getelementptr float, float* %2, i64 undef
+ %59 = ptrtoint float* %58 to i32
+ %60 = icmp ule i32 %59, undef
+ %61 = select i1 undef, i64 undef, i64 0
+ %62 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %61, i64 undef)
+ %63 = extractvalue { i64, i1 } %62, 0
+ %64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 1)
+ %65 = extractvalue { i64, i1 } %64, 0
+ %66 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %63, i64 %65)
+ %67 = extractvalue { i64, i1 } %66, 0
+ %68 = sext i32 %0 to i64
+ %69 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %67, i64 %68)
+ %70 = extractvalue { i64, i1 } %69, 0
+ %71 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %70, i64 0)
+ %72 = extractvalue { i64, i1 } %71, 0
+ %73 = getelementptr float, float* %5, i64 %72
+ %74 = ptrtoint float* %73 to i32
+ %75 = icmp ule i32 %74, undef
+ %76 = or i1 %60, %75
+ %77 = and i1 %57, %76
+ %78 = getelementptr float, float* %6, i64 undef
+ %79 = ptrtoint float* %78 to i32
+ %80 = icmp ule i32 %79, undef
+ %81 = getelementptr float, float* %5, i64 undef
+ %82 = ptrtoint float* %81 to i32
+ %83 = icmp ule i32 %82, undef
+ %84 = or i1 %80, %83
+ %85 = and i1 %77, %84
+ %86 = and i1 %85, undef
+ %87 = and i1 %86, undef
+ %88 = and i1 %87, undef
+ %89 = and i1 %88, undef
+ %90 = and i1 %89, undef
+ %91 = and i1 %90, undef
+ %92 = and i1 %91, undef
+ %93 = and i1 %92, undef
+ %94 = and i1 %93, undef
+ %95 = and i1 %94, undef
+ br i1 %95, label %97, label %96
+
+; <label>:96: ; preds = %8
+ br i1 undef, label %.critedge122, label %.critedge110
+
+.critedge122: ; preds = %.critedge122, %96
+ br i1 false, label %.critedge122, label %.critedge110
+
+.critedge110: ; preds = %.critedge219, %97, %.critedge122, %96
+ ret void
+
+; <label>:97: ; preds = %8
+ br i1 undef, label %.critedge219, label %.critedge110
+
+.critedge219: ; preds = %.critedge219, %97
+ %.pr287 = phi i1 [ undef, %.critedge219 ], [ true, %97 ]
+ br i1 %.pr287, label %.critedge219, label %.critedge110
+}
+
+; Function Attrs: nounwind readnone
+declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64) #1
+
+; Function Attrs: nounwind readnone
+declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) #1
+
+; Function Attrs: nounwind readnone
+declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #1
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-cpu"="arm1176jzf-s" "target-features"="+dsp,+strict-align,+vfp2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 4.0.0 (trunk 285923) (llvm/trunk 285921)"}
diff --git a/test/CodeGen/ARM/v6-jumptable-clobber.mir b/test/CodeGen/ARM/v6-jumptable-clobber.mir
new file mode 100644
index 0000000000000..0e9bc42565f3b
--- /dev/null
+++ b/test/CodeGen/ARM/v6-jumptable-clobber.mir
@@ -0,0 +1,384 @@
+# RUN: llc -run-pass=arm-cp-islands -o - %s | FileCheck %s
+
+# Test created by tweaking the register allocation after stopping the IR below
+# just before constant islands. We were forwarding the table index to the end of
+# the block, even though the LEA clobbered it.
+
+# CHECK-LABEL: name: foo
+# CHECK: tBR_JT
+ # This order is important. If the jump-table comes first then the
+ # transformation is valid because the LEA can be removed, see second test.
+# CHECK: CONSTPOOL_ENTRY
+# CHECK: JUMPTABLE_ADDRS
+
+# CHECK-LABEL: name: bar
+# CHECK: tTBB_JT %pc, killed %r1
+
+--- |
+ ; ModuleID = 'simple.ll'
+ source_filename = "simple.ll"
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv6m-none--eabi"
+
+ define void @foo(i8 %in, i32* %addr) {
+ store i32 12345678, i32* %addr
+ %1 = call i32 @llvm.arm.space(i32 980, i32 undef)
+ %2 = zext i8 %in to i32
+ switch i32 %2, label %default [
+ i32 0, label %d1
+ i32 1, label %d2
+ i32 3, label %d3
+ i32 4, label %d4
+ i32 5, label %d5
+ i32 6, label %d6
+ i32 7, label %d7
+ i32 2, label %d8
+ i32 8, label %d9
+ i32 9, label %d10
+ i32 19, label %d11
+ i32 20, label %d12
+ i32 21, label %d13
+ i32 22, label %d14
+ i32 24, label %d15
+ i32 25, label %d16
+ i32 26, label %d17
+ ]
+
+ default: ; preds = %0
+ unreachable
+
+ d1: ; preds = %0
+ unreachable
+
+ d2: ; preds = %0
+ unreachable
+
+ d3: ; preds = %0
+ unreachable
+
+ d4: ; preds = %0
+ unreachable
+
+ d5: ; preds = %0
+ unreachable
+
+ d6: ; preds = %0
+ unreachable
+
+ d7: ; preds = %0
+ unreachable
+
+ d8: ; preds = %0
+ unreachable
+
+ d9: ; preds = %0
+ unreachable
+
+ d10: ; preds = %0
+ unreachable
+
+ d11: ; preds = %0
+ unreachable
+
+ d12: ; preds = %0
+ unreachable
+
+ d13: ; preds = %0
+ unreachable
+
+ d14: ; preds = %0
+ unreachable
+
+ d15: ; preds = %0
+ unreachable
+
+ d16: ; preds = %0
+ unreachable
+
+ d17: ; preds = %0
+ unreachable
+ }
+
+ define void @bar(i8 %in, i32* %addr) {
+ store i32 12345678, i32* %addr
+ %1 = zext i8 %in to i32
+ switch i32 %1, label %default [
+ i32 0, label %d1
+ i32 1, label %d2
+ i32 3, label %d3
+ i32 4, label %d4
+ i32 5, label %d5
+ i32 6, label %d6
+ i32 7, label %d7
+ i32 2, label %d8
+ i32 8, label %d9
+ i32 9, label %d10
+ i32 19, label %d11
+ i32 20, label %d12
+ i32 21, label %d13
+ i32 22, label %d14
+ i32 24, label %d15
+ i32 25, label %d16
+ i32 26, label %d17
+ ]
+
+ default: ; preds = %0
+ unreachable
+
+ d1: ; preds = %0
+ unreachable
+
+ d2: ; preds = %0
+ unreachable
+
+ d3: ; preds = %0
+ unreachable
+
+ d4: ; preds = %0
+ unreachable
+
+ d5: ; preds = %0
+ unreachable
+
+ d6: ; preds = %0
+ unreachable
+
+ d7: ; preds = %0
+ unreachable
+
+ d8: ; preds = %0
+ unreachable
+
+ d9: ; preds = %0
+ unreachable
+
+ d10: ; preds = %0
+ unreachable
+
+ d11: ; preds = %0
+ unreachable
+
+ d12: ; preds = %0
+ unreachable
+
+ d13: ; preds = %0
+ unreachable
+
+ d14: ; preds = %0
+ unreachable
+
+ d15: ; preds = %0
+ unreachable
+
+ d16: ; preds = %0
+ unreachable
+
+ d17: ; preds = %0
+ unreachable
+ }
+
+ ; Function Attrs: nounwind
+ declare i32 @llvm.arm.space(i32, i32) #0
+
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #0
+
+ attributes #0 = { nounwind }
+
+...
+---
+name: foo
+alignment: 1
+exposesReturnsTwice: false
+noVRegs: true
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0' }
+ - { reg: '%r1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+constants:
+ - id: 0
+ value: i32 12345678
+ alignment: 4
+jumpTable:
+ kind: inline
+ entries:
+ - id: 0
+ blocks: [ '%bb.3.d2', '%bb.9.d8', '%bb.4.d3', '%bb.5.d4',
+ '%bb.6.d5', '%bb.7.d6', '%bb.8.d7', '%bb.10.d9',
+ '%bb.11.d10', '%bb.2.d1', '%bb.2.d1', '%bb.2.d1',
+ '%bb.2.d1', '%bb.2.d1', '%bb.2.d1', '%bb.2.d1',
+ '%bb.2.d1', '%bb.2.d1', '%bb.12.d11', '%bb.13.d12',
+ '%bb.14.d13', '%bb.15.d14', '%bb.2.d1', '%bb.16.d15',
+ '%bb.17.d16', '%bb.18.d17' ]
+body: |
+ bb.0 (%ir-block.0):
+ successors: %bb.2.d1(0x03c3c3c4), %bb.1(0x7c3c3c3c)
+ liveins: %r0, %r1
+
+ %r2 = tLDRpci %const.0, 14, _
+ tSTRi killed %r2, killed %r1, 0, 14, _ :: (store 4 into %ir.addr)
+ dead %r1 = SPACE 980, undef %r0
+ %r0 = tUXTB killed %r0, 14, _
+ %r1, dead %cpsr = tSUBi3 killed %r0, 1, 14, _
+ tCMPi8 %r1, 25, 14, _, implicit-def %cpsr
+ tBcc %bb.2.d1, 8, killed %cpsr
+
+ bb.1 (%ir-block.0):
+ successors: %bb.3.d2(0x07c549d2), %bb.9.d8(0x07c549d2), %bb.4.d3(0x07c549d2), %bb.5.d4(0x07c549d2), %bb.6.d5(0x07c549d2), %bb.7.d6(0x07c549d2), %bb.8.d7(0x07c549d2), %bb.10.d9(0x07c549d2), %bb.11.d10(0x07c549d2), %bb.2.d1(0x03ab62db), %bb.12.d11(0x07c549d2), %bb.13.d12(0x07c549d2), %bb.14.d13(0x07c549d2), %bb.15.d14(0x07c549d2), %bb.16.d15(0x07c549d2), %bb.17.d16(0x07c549d2), %bb.18.d17(0x07c549d2)
+ liveins: %r1
+
+ %r0, dead %cpsr = tLSLri killed %r1, 2, 14, _
+ %r1 = tLEApcrelJT %jump-table.0, 14, _
+ %r0 = tLDRr killed %r0, killed %r1, 14, _ :: (load 4 from jump-table)
+ tBR_JTr killed %r0, %jump-table.0
+
+ bb.3.d2:
+
+ bb.9.d8:
+
+ bb.4.d3:
+
+ bb.5.d4:
+
+ bb.6.d5:
+
+ bb.7.d6:
+
+ bb.8.d7:
+
+ bb.10.d9:
+
+ bb.11.d10:
+
+ bb.2.d1:
+
+ bb.12.d11:
+
+ bb.13.d12:
+
+ bb.14.d13:
+
+ bb.15.d14:
+
+ bb.16.d15:
+
+ bb.17.d16:
+
+ bb.18.d17:
+
+...
+
+---
+name: bar
+alignment: 1
+exposesReturnsTwice: false
+noVRegs: true
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0' }
+ - { reg: '%r1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+constants:
+ - id: 0
+ value: i32 12345678
+ alignment: 4
+jumpTable:
+ kind: inline
+ entries:
+ - id: 0
+ blocks: [ '%bb.3.d2', '%bb.9.d8', '%bb.4.d3', '%bb.5.d4',
+ '%bb.6.d5', '%bb.7.d6', '%bb.8.d7', '%bb.10.d9',
+ '%bb.11.d10', '%bb.2.d1', '%bb.2.d1', '%bb.2.d1',
+ '%bb.2.d1', '%bb.2.d1', '%bb.2.d1', '%bb.2.d1',
+ '%bb.2.d1', '%bb.2.d1', '%bb.12.d11', '%bb.13.d12',
+ '%bb.14.d13', '%bb.15.d14', '%bb.2.d1', '%bb.16.d15',
+ '%bb.17.d16', '%bb.18.d17' ]
+body: |
+ bb.0 (%ir-block.0):
+ successors: %bb.2.d1(0x03c3c3c4), %bb.1(0x7c3c3c3c)
+ liveins: %r0, %r1
+
+ %r2 = tLDRpci %const.0, 14, _
+ tSTRi killed %r2, killed %r1, 0, 14, _ :: (store 4 into %ir.addr)
+ %r0 = tUXTB killed %r0, 14, _
+ %r1, dead %cpsr = tSUBi3 killed %r0, 1, 14, _
+ tCMPi8 %r1, 25, 14, _, implicit-def %cpsr
+ tBcc %bb.2.d1, 8, killed %cpsr
+
+ bb.1 (%ir-block.0):
+ successors: %bb.3.d2(0x07c549d2), %bb.9.d8(0x07c549d2), %bb.4.d3(0x07c549d2), %bb.5.d4(0x07c549d2), %bb.6.d5(0x07c549d2), %bb.7.d6(0x07c549d2), %bb.8.d7(0x07c549d2), %bb.10.d9(0x07c549d2), %bb.11.d10(0x07c549d2), %bb.2.d1(0x03ab62db), %bb.12.d11(0x07c549d2), %bb.13.d12(0x07c549d2), %bb.14.d13(0x07c549d2), %bb.15.d14(0x07c549d2), %bb.16.d15(0x07c549d2), %bb.17.d16(0x07c549d2), %bb.18.d17(0x07c549d2)
+ liveins: %r1
+
+ %r0, dead %cpsr = tLSLri killed %r1, 2, 14, _
+ %r1 = tLEApcrelJT %jump-table.0, 14, _
+ %r0 = tLDRr killed %r0, killed %r1, 14, _ :: (load 4 from jump-table)
+ tBR_JTr killed %r0, %jump-table.0
+
+ bb.3.d2:
+
+ bb.9.d8:
+
+ bb.4.d3:
+
+ bb.5.d4:
+
+ bb.6.d5:
+
+ bb.7.d6:
+
+ bb.8.d7:
+
+ bb.10.d9:
+
+ bb.11.d10:
+
+ bb.2.d1:
+
+ bb.12.d11:
+
+ bb.13.d12:
+
+ bb.14.d13:
+
+ bb.15.d14:
+
+ bb.16.d15:
+
+ bb.17.d16:
+
+ bb.18.d17:
+
+...
diff --git a/test/CodeGen/ARM/v8m-tail-call.ll b/test/CodeGen/ARM/v8m-tail-call.ll
new file mode 100644
index 0000000000000..2c2c795838ff4
--- /dev/null
+++ b/test/CodeGen/ARM/v8m-tail-call.ll
@@ -0,0 +1,23 @@
+; RUN: llc %s -o - -mtriple=thumbv8m.base | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: test:
+entry:
+ %call = tail call i32 @foo()
+ %tail = tail call i32 @foo()
+ ret void
+; CHECK: bl foo
+; CHECK: bl foo
+; CHECK-NOT: b foo
+}
+
+define void @test2() {
+; CHECK-LABEL: test2:
+entry:
+ %tail = tail call i32 @foo()
+ ret void
+; CHECK: b foo
+; CHECK-NOT: bl foo
+}
+
+declare i32 @foo()
diff --git a/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll b/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll
new file mode 100644
index 0000000000000..673e04687a10e
--- /dev/null
+++ b/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll
@@ -0,0 +1,51 @@
+; RUN: llc -filetype=obj -o /dev/null < %s
+; RUN: llc -filetype=asm < %s | FileCheck %s
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+source_filename = "bugpoint-output-39ed676.bc"
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8m.base-arm-none-eabi"
+
+@crc32_tab = external unnamed_addr global [256 x i32], align 4
+@g_566 = external global i32**, align 4
+
+define void @main() {
+entry:
+ %0 = load volatile i32**, i32*** @g_566, align 4
+ br label %func_16.exit.i.i.i
+
+lbl_1394.i.i.i.loopexit: ; preds = %for.cond14.preheader.us.i.i.i
+ unreachable
+
+func_16.exit.i.i.i: ; preds = %entry
+ br i1 undef, label %for.cond7.preheader.i.lr.ph.i.i, label %for.end476.i.i.i.loopexit
+
+for.cond7.preheader.i.lr.ph.i.i: ; preds = %func_16.exit.i.i.i
+ br i1 undef, label %for.end476.i.i.i.loopexit, label %for.cond7.preheader.i.i.preheader.i
+
+for.cond7.preheader.i.i.preheader.i: ; preds = %for.cond7.preheader.i.lr.ph.i.i
+ br label %for.cond14.preheader.us.i.i.i
+
+for.cond7.preheader.i.us.i.i: ; preds = %for.cond7.preheader.i.lr.ph.i.i
+ unreachable
+
+for.cond14.preheader.us.i.i.i: ; preds = %for.inc459.us.i.i.i, %for.cond7.preheader.i.i.preheader.i
+; CHECK: @ BB#4
+; CHECK-NEXT: .p2align 2
+ switch i4 undef, label %func_1.exit.loopexit [
+ i4 0, label %for.inc459.us.i.i.i
+ i4 -5, label %for.inc459.us.i.i.i
+ i4 2, label %lbl_1394.i.i.i.loopexit
+ i4 3, label %for.end476.i.i.i.loopexit
+ ]
+
+for.inc459.us.i.i.i: ; preds = %for.cond14.preheader.us.i.i.i, %for.cond14.preheader.us.i.i.i
+ br label %for.cond14.preheader.us.i.i.i
+
+for.end476.i.i.i.loopexit: ; preds = %for.cond14.preheader.us.i.i.i
+ unreachable
+
+func_1.exit.loopexit: ; preds = %for.cond14.preheader.us.i.i.i
+ %arrayidx.i63.i.i5252 = getelementptr inbounds [256 x i32], [256 x i32]* @crc32_tab, i32 0, i32 undef
+ unreachable
+}
diff --git a/test/CodeGen/ARM/va_arg.ll b/test/CodeGen/ARM/va_arg.ll
index d901a7461fc86..57470694b124b 100644
--- a/test/CodeGen/ARM/va_arg.ll
+++ b/test/CodeGen/ARM/va_arg.ll
@@ -4,8 +4,8 @@
; CHECK-LABEL: test1:
; CHECK-NOT: bfc
; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
-; CHECK: bfc [[REG]], #0, #3
-; CHECK-NOT: bfc
+; CHECK: bic {{(r[0-9]+)|(lr)}}, [[REG]], #7
+; CHECK-NOT: bic
define i64 @test1(i32 %i, ...) nounwind optsize {
entry:
@@ -20,8 +20,8 @@ entry:
; CHECK-LABEL: test2:
; CHECK-NOT: bfc
; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
-; CHECK: bfc [[REG]], #0, #3
-; CHECK-NOT: bfc
+; CHECK: bic {{(r[0-9]+)|(lr)}}, [[REG]], #7
+; CHECK-NOT: bic
; CHECK: bx lr
define double @test2(i32 %a, i32* %b, ...) nounwind optsize {
diff --git a/test/CodeGen/ARM/vcmp-crash.ll b/test/CodeGen/ARM/vcmp-crash.ll
new file mode 100644
index 0000000000000..2d3262be5849b
--- /dev/null
+++ b/test/CodeGen/ARM/vcmp-crash.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mcpu=cortex-m4 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7em-none--eabi"
+
+; CHECK: vcmp.f32
+define double @f(double %a, double %b, double %c, float %d) {
+ %1 = fcmp oeq float %d, 0.0
+ %2 = select i1 %1, double %a, double %c
+ ret double %2
+}
diff --git a/test/CodeGen/ARM/vldm-liveness.ll b/test/CodeGen/ARM/vldm-liveness.ll
index e114e6970a324..63dc9d61ebcca 100644
--- a/test/CodeGen/ARM/vldm-liveness.ll
+++ b/test/CodeGen/ARM/vldm-liveness.ll
@@ -1,26 +1,13 @@
; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s
-; ARM load store optimizer was dealing with a sequence like:
-; s1 = VLDRS [r0, 1], Q0<imp-def>
-; s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
-; s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
-; s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
+; Make sure we emit the loads in ascending order, and form a vldmia.
;
-; It decided to combine the {s0, s1} loads into a single instruction in the
-; third position. However, this leaves the instruction defining s3 with a stray
-; imp-use of Q0, which is undefined.
-;
-; The verifier catches this, so this test just makes sure that appropriate
-; liveness flags are added.
-;
-; I believe the change will be tested as long as the vldmia is not the first of
-; the loads. Earlier optimisations may perturb the output over time, but
-; fiddling the indices should be sufficient to restore the test.
+; See vldm-liveness.mir for the bug this file originally testing.
define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
; CHECK-LABEL: foo:
-; CHECK: vldr s3, [r0, #8]
; CHECK: vldmia r0, {s0, s1}
+; CHECK: vldr s3, [r0, #8]
; CHECK: vldr s2, [r0, #16]
%off0 = getelementptr float, float* %ptr, i32 0
%val0 = load float, float* %off0
diff --git a/test/CodeGen/ARM/vldm-liveness.mir b/test/CodeGen/ARM/vldm-liveness.mir
new file mode 100644
index 0000000000000..a85a018a8b1a5
--- /dev/null
+++ b/test/CodeGen/ARM/vldm-liveness.mir
@@ -0,0 +1,40 @@
+# RUN: llc -run-pass arm-ldst-opt -verify-machineinstrs %s -o - | FileCheck %s
+# ARM load store optimizer was dealing with a sequence like:
+# s1 = VLDRS [r0, 1], Q0<imp-def>
+# s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
+# s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
+# s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
+#
+# It decided to combine the {s0, s1} loads into a single instruction in the
+# third position. However, this leaves the instruction defining s3 with a stray
+# imp-use of Q0, which is undefined.
+#
+# The verifier catches this, so this test just makes sure that appropriate
+# liveness flags are added.
+--- |
+ target triple = "thumbv7-apple-ios"
+ define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
+ ret <4 x float> undef
+ }
+...
+---
+name: foo
+alignment: 1
+liveins:
+ - { reg: '%r0' }
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %r0
+
+ %s1 = VLDRS %r0, 1, 14, _, implicit-def %q0 :: (load 4)
+ %s3 = VLDRS %r0, 2, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4)
+ ; CHECK: %s3 = VLDRS %r0, 2, 14, _, implicit killed undef %q0, implicit-def %q0 :: (load 4)
+
+ %s0 = VLDRS %r0, 0, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4)
+ ; CHECK: VLDMSIA %r0, 14, _, def %s0, def %s1, implicit-def _
+
+ %s2 = VLDRS killed %r0, 4, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4)
+ ; CHECK: %s2 = VLDRS killed %r0, 4, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4)
+
+ tBX_RET 14, _, implicit %q0
+...
diff --git a/test/CodeGen/ARM/vsel.ll b/test/CodeGen/ARM/vsel.ll
index 746b1b000ef10..daea41399b47c 100644
--- a/test/CodeGen/ARM/vsel.ll
+++ b/test/CodeGen/ARM/vsel.ll
@@ -132,7 +132,7 @@ define void @test_vsel32oeq(float %lhs32, float %rhs32, float %a, float %b) {
%tst1 = fcmp oeq float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
-; CHECK: vcmpe.f32 s0, s1
+; CHECK: vcmp.f32 s0, s1
; CHECK: vseleq.f32 s0, s2, s3
ret void
}
@@ -141,7 +141,7 @@ define void @test_vsel64oeq(float %lhs32, float %rhs32, double %a, double %b) {
%tst1 = fcmp oeq float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
-; CHECK: vcmpe.f32 s0, s1
+; CHECK: vcmp.f32 s0, s1
; CHECK: vseleq.f64 d16, d1, d2
ret void
}
@@ -276,7 +276,7 @@ define void @test_vsel32une(float %lhs32, float %rhs32, float %a, float %b) {
%tst1 = fcmp une float %lhs32, %rhs32
%val1 = select i1 %tst1, float %a, float %b
store float %val1, float* @varfloat
-; CHECK: vcmpe.f32 s0, s1
+; CHECK: vcmp.f32 s0, s1
; CHECK: vseleq.f32 s0, s3, s2
ret void
}
@@ -285,7 +285,7 @@ define void @test_vsel64une(float %lhs32, float %rhs32, double %a, double %b) {
%tst1 = fcmp une float %lhs32, %rhs32
%val1 = select i1 %tst1, double %a, double %b
store double %val1, double* @vardouble
-; CHECK: vcmpe.f32 s0, s1
+; CHECK: vcmp.f32 s0, s1
; CHECK: vseleq.f64 d16, d2, d1
ret void
}
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index a83a4df5490c3..0a5235df319fe 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -318,33 +318,29 @@ entry:
ret void
}
-define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {
+define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {
; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.
; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to
-; truncate from i32 to i16 and one vuzp to perform the final truncation for i8.
-; CHECK-LABEL: vuzp_trunc:
+; truncate from i32 to i16 and one vmovn.i16 to perform the final truncation for i8.
+; CHECK-LABEL: cmpsel_trunc:
; CHECK: @ BB#0:
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
-; CHECK-NEXT: add r12, sp, #48
-; CHECK-NEXT: add lr, sp, #16
; CHECK-NEXT: add r4, sp, #64
; CHECK-NEXT: add r5, sp, #32
+; CHECK-NEXT: add r12, sp, #48
+; CHECK-NEXT: add lr, sp, #16
; CHECK-NEXT: vld1.64 {d16, d17}, [r5]
; CHECK-NEXT: vld1.64 {d18, d19}, [r4]
; CHECK-NEXT: vld1.64 {d20, d21}, [lr]
; CHECK-NEXT: vld1.64 {d22, d23}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vcgt.u32 q9, q11, q10
-; CHECK-NEXT: vmovn.i32 d16, q8
-; CHECK-NEXT: vmovn.i32 d17, q9
-; CHECK-NEXT: vmov.i8 d18, #0x7
-; CHECK-NEXT: vmov d19, r0, r1
-; CHECK-NEXT: vuzp.8 d17, d16
-; CHECK-NEXT: vneg.s8 d16, d18
-; CHECK-NEXT: vshl.i8 d17, d17, #7
+; CHECK-NEXT: vmovn.i32 d17, q8
+; CHECK-NEXT: vmovn.i32 d16, q9
; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vshl.s8 d16, d17, d16
+; CHECK-NEXT: vmov d19, r0, r1
+; CHECK-NEXT: vmovn.i16 d16, q8
; CHECK-NEXT: vbsl d16, d19, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: pop {r4, r5, r11, lr}