aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/AArch64
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-07-23 20:41:05 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-07-23 20:41:05 +0000
commit01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch)
tree4def12e759965de927d963ac65840d663ef9d1ea /test/CodeGen/AArch64
parentf0f4822ed4b66e3579e92a89f368f8fb860e218e (diff)
Notes
Diffstat (limited to 'test/CodeGen/AArch64')
-rw-r--r--test/CodeGen/AArch64/128bit_load_store.ll2
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll63
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir329
-rw-r--r--test/CodeGen/AArch64/a57-csel.ll5
-rw-r--r--test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll5
-rw-r--r--test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll43
-rw-r--r--test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll18
-rw-r--r--test/CodeGen/AArch64/aarch64-be-bv.ll198
-rw-r--r--test/CodeGen/AArch64/aarch64-deferred-spilling.ll514
-rw-r--r--test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll223
-rw-r--r--test/CodeGen/AArch64/aarch64-gep-opt.ll3
-rw-r--r--test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll86
-rw-r--r--test/CodeGen/AArch64/aarch64-interleaved-accesses.ll12
-rw-r--r--test/CodeGen/AArch64/aarch64-smull.ll4
-rw-r--r--test/CodeGen/AArch64/aarch64-stp-cluster.ll149
-rw-r--r--test/CodeGen/AArch64/aarch64-tbz.ll98
-rw-r--r--test/CodeGen/AArch64/aarch64-tryBitfieldInsertOpFromOr-crash.ll36
-rw-r--r--test/CodeGen/AArch64/addsub.ll8
-rw-r--r--test/CodeGen/AArch64/alloca.ll18
-rw-r--r--test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll15
-rw-r--r--test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-aapcs-be.ll3
-rw-r--r--test/CodeGen/AArch64/arm64-abi-varargs.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-abi.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-abi_align.ll32
-rw-r--r--test/CodeGen/AArch64/arm64-addrmode.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-atomic-128.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-atomic.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-bitfield-extract.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-build-vector.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-builtins-linux.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-ccmp.ll249
-rw-r--r--test/CodeGen/AArch64/arm64-collect-loh.ll22
-rw-r--r--test/CodeGen/AArch64/arm64-const-addr.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-convert-v4f64.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-csldst-mmo.ll25
-rw-r--r--test/CodeGen/AArch64/arm64-detect-vec-redux.ll52
-rw-r--r--test/CodeGen/AArch64/arm64-extern-weak.ll15
-rw-r--r--test/CodeGen/AArch64/arm64-extract.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll16
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-alloca.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-call.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-gv.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-fcopysign.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-fma-combines.ll136
-rw-r--r--test/CodeGen/AArch64/arm64-fml-combines.ll128
-rw-r--r--test/CodeGen/AArch64/arm64-fp128.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-hello.ll21
-rw-r--r--test/CodeGen/AArch64/arm64-inline-asm.ll16
-rw-r--r--test/CodeGen/AArch64/arm64-join-reserved.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-ldp-cluster.ll150
-rw-r--r--test/CodeGen/AArch64/arm64-memcpy-inline.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-misched-basic-A53.ll1
-rw-r--r--test/CodeGen/AArch64/arm64-misched-memdep-bug.ll3
-rw-r--r--test/CodeGen/AArch64/arm64-misched-multimmo.ll23
-rw-r--r--test/CodeGen/AArch64/arm64-movi.ll98
-rw-r--r--test/CodeGen/AArch64/arm64-mul.ll62
-rw-r--r--test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll94
-rw-r--r--test/CodeGen/AArch64/arm64-neon-2velem-high.ll36
-rw-r--r--test/CodeGen/AArch64/arm64-neon-copy.ll41
-rw-r--r--test/CodeGen/AArch64/arm64-nvcast.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll58
-rw-r--r--test/CodeGen/AArch64/arm64-patchpoint.ll19
-rw-r--r--test/CodeGen/AArch64/arm64-register-pairing.ll31
-rw-r--r--test/CodeGen/AArch64/arm64-regress-opt-cmp.mir42
-rw-r--r--test/CodeGen/AArch64/arm64-rev.ll27
-rw-r--r--test/CodeGen/AArch64/arm64-shrink-wrapping.ll70
-rw-r--r--test/CodeGen/AArch64/arm64-stp-aa.ll34
-rw-r--r--test/CodeGen/AArch64/arm64-stp.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-this-return.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-tls-dynamic-together.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-variadic-aapcs.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-vclz.ll28
-rw-r--r--test/CodeGen/AArch64/arm64-vecCmpBr.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-vector-ext.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-vector-imm.ll18
-rw-r--r--test/CodeGen/AArch64/arm64-virtual_base.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-vshift.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-vshuffle.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll72
-rw-r--r--test/CodeGen/AArch64/atomic-ops.ll2
-rw-r--r--test/CodeGen/AArch64/bitfield-extract.ll98
-rw-r--r--test/CodeGen/AArch64/bitfield-insert.ll245
-rw-r--r--test/CodeGen/AArch64/bitfield.ll2
-rw-r--r--test/CodeGen/AArch64/bitreverse.ll17
-rw-r--r--test/CodeGen/AArch64/branch-folder-merge-mmos.ll2
-rw-r--r--test/CodeGen/AArch64/bswap-known-bits.ll44
-rw-r--r--test/CodeGen/AArch64/cmpxchg-O0.ll75
-rw-r--r--test/CodeGen/AArch64/combine-comparisons-by-cse.ll44
-rw-r--r--test/CodeGen/AArch64/complex-int-to-fp.ll2
-rw-r--r--test/CodeGen/AArch64/cond-sel.ll40
-rw-r--r--test/CodeGen/AArch64/cpus.ll3
-rw-r--r--test/CodeGen/AArch64/cxx-tlscc.ll127
-rw-r--r--test/CodeGen/AArch64/dag-combine-invaraints.ll2
-rw-r--r--test/CodeGen/AArch64/directcond.ll2
-rw-r--r--test/CodeGen/AArch64/div_minsize.ll45
-rw-r--r--test/CodeGen/AArch64/emutls.ll121
-rw-r--r--test/CodeGen/AArch64/emutls_generic.ll9
-rw-r--r--test/CodeGen/AArch64/extern-weak.ll15
-rw-r--r--test/CodeGen/AArch64/f16-instructions.ll38
-rw-r--r--test/CodeGen/AArch64/fast-isel-branch-cond-split.ll4
-rw-r--r--test/CodeGen/AArch64/fast-isel-cmp-vec.ll12
-rw-r--r--test/CodeGen/AArch64/fast-isel-gep.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-tbz.ll4
-rw-r--r--test/CodeGen/AArch64/fastcc.ll147
-rw-r--r--test/CodeGen/AArch64/fcvt-int.ll25
-rw-r--r--test/CodeGen/AArch64/fcvt_combine.ll8
-rw-r--r--test/CodeGen/AArch64/fdiv-combine.ll30
-rw-r--r--test/CodeGen/AArch64/fdiv_combine.ll4
-rw-r--r--test/CodeGen/AArch64/fp-cond-sel.ll6
-rw-r--r--test/CodeGen/AArch64/fp16-v4-instructions.ll133
-rw-r--r--test/CodeGen/AArch64/fp16-vector-nvcast.ll12
-rw-r--r--test/CodeGen/AArch64/fpimm.ll13
-rw-r--r--test/CodeGen/AArch64/func-argpassing.ll2
-rw-r--r--test/CodeGen/AArch64/func-calls.ll14
-rw-r--r--test/CodeGen/AArch64/gep-nullptr.ll23
-rw-r--r--test/CodeGen/AArch64/global-merge-3.ll4
-rw-r--r--test/CodeGen/AArch64/global-merge-group-by-use.ll4
-rw-r--r--test/CodeGen/AArch64/half.ll12
-rw-r--r--test/CodeGen/AArch64/hints.ll2
-rw-r--r--test/CodeGen/AArch64/inlineasm-X-allocation.ll17
-rw-r--r--test/CodeGen/AArch64/inlineasm-X-constraint.ll152
-rw-r--r--test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll4
-rw-r--r--test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll125
-rw-r--r--test/CodeGen/AArch64/lit.local.cfg2
-rw-r--r--test/CodeGen/AArch64/local_vars.ll15
-rw-r--r--test/CodeGen/AArch64/logical-imm.ll4
-rw-r--r--test/CodeGen/AArch64/lower-range-metadata-func-call.ll44
-rw-r--r--test/CodeGen/AArch64/machine-combiner.ll2
-rw-r--r--test/CodeGen/AArch64/machine-copy-remove.ll94
-rw-r--r--test/CodeGen/AArch64/merge-store-dependency.ll63
-rw-r--r--test/CodeGen/AArch64/merge-store.ll13
-rw-r--r--test/CodeGen/AArch64/misched-fusion.ll10
-rw-r--r--test/CodeGen/AArch64/movimm-wzr.mir46
-rw-r--r--test/CodeGen/AArch64/movw-consts.ll10
-rw-r--r--test/CodeGen/AArch64/neg-imm.ll46
-rw-r--r--test/CodeGen/AArch64/neon-compare-instructions.ll57
-rw-r--r--test/CodeGen/AArch64/neon-mov.ll2
-rw-r--r--test/CodeGen/AArch64/neon-perm.ll2
-rw-r--r--test/CodeGen/AArch64/no-quad-ldp-stp.ll29
-rw-r--r--test/CodeGen/AArch64/nontemporal.ll6
-rw-r--r--test/CodeGen/AArch64/nzcv-save.ll2
-rw-r--r--test/CodeGen/AArch64/optimize-cond-branch.ll48
-rw-r--r--test/CodeGen/AArch64/pie.ll14
-rw-r--r--test/CodeGen/AArch64/preferred-alignment.ll28
-rw-r--r--test/CodeGen/AArch64/preserve_mostcc.ll40
-rw-r--r--test/CodeGen/AArch64/recp-fastmath.ll79
-rw-r--r--test/CodeGen/AArch64/regress-tblgen-chains.ll2
-rw-r--r--test/CodeGen/AArch64/rem_crash.ll257
-rw-r--r--test/CodeGen/AArch64/remat.ll10
-rw-r--r--test/CodeGen/AArch64/sibling-call.ll2
-rw-r--r--test/CodeGen/AArch64/special-reg.ll2
-rw-r--r--test/CodeGen/AArch64/sqrt-fastmath.ll160
-rw-r--r--test/CodeGen/AArch64/stack-guard-remat-bitcast.ll8
-rw-r--r--test/CodeGen/AArch64/stack-protector-target.ll19
-rw-r--r--test/CodeGen/AArch64/stackmap-frame-setup.ll4
-rw-r--r--test/CodeGen/AArch64/stackmap-liveness.ll2
-rw-r--r--test/CodeGen/AArch64/subs-to-sub-opt.ll23
-rw-r--r--test/CodeGen/AArch64/swifterror.ll385
-rw-r--r--test/CodeGen/AArch64/swiftself.ll67
-rw-r--r--test/CodeGen/AArch64/tailcall-ccmismatch.ll24
-rw-r--r--test/CodeGen/AArch64/tailcall-implicit-sret.ll2
-rw-r--r--test/CodeGen/AArch64/tailcall_misched_graph.ll6
-rw-r--r--test/CodeGen/AArch64/tailmerging_in_mbp.ll63
-rw-r--r--test/CodeGen/AArch64/vcvt-oversize.ll5
-rw-r--r--test/CodeGen/AArch64/vector-fcopysign.ll28
-rw-r--r--test/CodeGen/AArch64/vector_merge_dep_check.ll41
170 files changed, 5793 insertions, 1254 deletions
diff --git a/test/CodeGen/AArch64/128bit_load_store.ll b/test/CodeGen/AArch64/128bit_load_store.ll
index 94fd386e0eaf..38d30dba4b8c 100644
--- a/test/CodeGen/AArch64/128bit_load_store.ll
+++ b/test/CodeGen/AArch64/128bit_load_store.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s
define void @test_store_f128(fp128* %ptr, fp128 %val) #0 {
; CHECK-LABEL: test_store_f128
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
new file mode 100644
index 000000000000..7d416d9b0add
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -0,0 +1,63 @@
+; RUN: llc -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+; REQUIRES: global-isel
+; This file checks that the translation from llvm IR to generic MachineInstr
+; is correct.
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-apple-ios"
+
+; Tests for add.
+; CHECK: name: addi64
+; CHECK: [[ARG1:%[0-9]+]](64) = COPY %x0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](64) = COPY %x1
+; CHECK-NEXT: [[RES:%[0-9]+]](64) = G_ADD i64 [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %x0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %x0
+define i64 @addi64(i64 %arg1, i64 %arg2) {
+ %res = add i64 %arg1, %arg2
+ ret i64 %res
+}
+
+; Tests for br.
+; CHECK: name: uncondbr
+; CHECK: body:
+;
+; Entry basic block.
+; CHECK: {{[0-9a-zA-Z._-]+}}:
+;
+; Make sure we have one successor and only one.
+; CHECK-NEXT: successors: %[[END:[0-9a-zA-Z._-]+]]({{0x[a-f0-9]+ / 0x[a-f0-9]+}} = 100.00%)
+;
+; Check that we emit the correct branch.
+; CHECK: G_BR label %[[END]]
+;
+; Check that end contains the return instruction.
+; CHECK: [[END]]:
+; CHECK-NEXT: RET_ReallyLR
+define void @uncondbr() {
+ br label %end
+end:
+ ret void
+}
+
+; Tests for or.
+; CHECK: name: ori64
+; CHECK: [[ARG1:%[0-9]+]](64) = COPY %x0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](64) = COPY %x1
+; CHECK-NEXT: [[RES:%[0-9]+]](64) = G_OR i64 [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %x0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %x0
+define i64 @ori64(i64 %arg1, i64 %arg2) {
+ %res = or i64 %arg1, %arg2
+ ret i64 %res
+}
+
+; CHECK: name: ori32
+; CHECK: [[ARG1:%[0-9]+]](32) = COPY %w0
+; CHECK-NEXT: [[ARG2:%[0-9]+]](32) = COPY %w1
+; CHECK-NEXT: [[RES:%[0-9]+]](32) = G_OR i32 [[ARG1]], [[ARG2]]
+; CHECK-NEXT: %w0 = COPY [[RES]]
+; CHECK-NEXT: RET_ReallyLR implicit %w0
+define i32 @ori32(i32 %arg1, i32 %arg2) {
+ %res = or i32 %arg1, %arg2
+ ret i32 %res
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
new file mode 100644
index 000000000000..f5d85e189d75
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
@@ -0,0 +1,329 @@
+# RUN: llc -O0 -run-pass=regbankselect -global-isel %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
+# RUN: llc -O0 -run-pass=regbankselect -global-isel %s -regbankselect-greedy -o - 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=GREEDY
+# REQUIRES: global-isel
+
+--- |
+ ; ModuleID = 'generic-virtual-registers-type-error.mir'
+ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-apple-ios"
+ define void @defaultMapping() {
+ entry:
+ ret void
+ }
+ define void @defaultMappingVector() {
+ entry:
+ ret void
+ }
+ define void @defaultMapping1Repair() {
+ entry:
+ ret void
+ }
+ define void @defaultMapping2Repairs() {
+ entry:
+ ret void
+ }
+ define void @defaultMappingDefRepair() {
+ entry:
+ ret void
+ }
+ define void @phiPropagation(i32* %src, i32* %dst, i1 %cond) {
+ entry:
+ %srcVal = load i32, i32* %src
+ br i1 %cond, label %end, label %then
+ then:
+ %res = add i32 %srcVal, 36
+ br label %end
+ end:
+ %toStore = phi i32 [ %srcVal, %entry ], [ %res, %then ]
+ store i32 %toStore, i32* %dst
+ ret void
+ }
+ define void @defaultMappingUseRepairPhysReg() {
+ entry:
+ ret void
+ }
+ define void @defaultMappingDefRepairPhysReg() {
+ entry:
+ ret void
+ }
+ define void @greedyMappingOr() {
+ entry:
+ ret void
+ }
+ define void @greedyMappingOrWithConstraints() {
+ entry:
+ ret void
+ }
+...
+
+---
+# Check that we assign a relevant register bank for %0.
+# Based on the type i32, this should be gpr.
+name: defaultMapping
+isSSA: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr }
+registers:
+ - { id: 0, class: _ }
+body: |
+ bb.0.entry:
+ liveins: %x0
+ ; CHECK: %0(32) = G_ADD i32 %x0
+ %0(32) = G_ADD i32 %x0, %x0
+...
+
+---
+# Check that we assign a relevant register bank for %0.
+# Based on the type <2 x i32>, this should be fpr.
+# FPR is used for both floating point and vector registers.
+name: defaultMappingVector
+isSSA: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: fpr }
+registers:
+ - { id: 0, class: _ }
+body: |
+ bb.0.entry:
+ liveins: %d0
+ ; CHECK: %0(32) = G_ADD <2 x i32> %d0
+ %0(32) = G_ADD <2 x i32> %d0, %d0
+...
+
+---
+# Check that we repair the assignment for %0.
+# Indeed based on the source of the copy it should live
+# in FPR, but at the use, it should be GPR.
+name: defaultMapping1Repair
+isSSA: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: fpr }
+# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 2, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0.entry:
+ liveins: %s0, %x0
+ ; CHECK: %0(32) = COPY %s0
+ ; CHECK-NEXT: %2(32) = COPY %0
+ ; CHECK-NEXT: %1(32) = G_ADD i32 %2, %x0
+ %0(32) = COPY %s0
+ %1(32) = G_ADD i32 %0, %x0
+...
+
+# Check that we repair the assignment for %0 differently for both uses.
+name: defaultMapping2Repairs
+isSSA: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: fpr }
+# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 3, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0.entry:
+ liveins: %s0, %x0
+ ; CHECK: %0(32) = COPY %s0
+ ; CHECK-NEXT: %2(32) = COPY %0
+ ; CHECK-NEXT: %3(32) = COPY %0
+ ; CHECK-NEXT: %1(32) = G_ADD i32 %2, %3
+ %0(32) = COPY %s0
+ %1(32) = G_ADD i32 %0, %0
+...
+
+---
+# Check that we repair the definition of %1.
+# %1 is forced to be into FPR, but its definition actually
+# requires that it lives in GPR. Make sure regbankselect
+# fixes that.
+name: defaultMappingDefRepair
+isSSA: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr }
+# CHECK-NEXT: - { id: 1, class: fpr }
+# CHECK-NEXT: - { id: 2, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: fpr }
+body: |
+ bb.0.entry:
+ liveins: %w0
+ ; CHECK: %0(32) = COPY %w0
+ ; CHECK-NEXT: %2(32) = G_ADD i32 %0, %w0
+ ; CHECK-NEXT: %1(32) = COPY %2
+ %0(32) = COPY %w0
+ %1(32) = G_ADD i32 %0, %w0
+...
+
+---
+# Check that we are able to propagate register banks from phis.
+name: phiPropagation
+isSSA: true
+tracksRegLiveness: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr32 }
+# CHECK-NEXT: - { id: 1, class: gpr64sp }
+# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 4, class: gpr }
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr64sp }
+ - { id: 2, class: gpr32 }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+body: |
+ bb.0.entry:
+ successors: %bb.2.end, %bb.1.then
+ liveins: %x0, %x1, %w2
+
+ %0 = LDRWui killed %x0, 0 :: (load 4 from %ir.src)
+ %1 = COPY %x1
+ %2 = COPY %w2
+ TBNZW killed %2, 0, %bb.2.end
+
+ bb.1.then:
+ successors: %bb.2.end
+ %3(32) = G_ADD i32 %0, %0
+
+ bb.2.end:
+ %4(32) = PHI %0, %bb.0.entry, %3, %bb.1.then
+ STRWui killed %4, killed %1, 0 :: (store 4 into %ir.dst)
+ RET_ReallyLR
+...
+
+---
+# Make sure we can repair physical register uses as well.
+name: defaultMappingUseRepairPhysReg
+isSSA: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr }
+# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 2, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0.entry:
+ liveins: %w0, %s0
+ ; CHECK: %0(32) = COPY %w0
+ ; CHECK-NEXT: %2(32) = COPY %s0
+ ; CHECK-NEXT: %1(32) = G_ADD i32 %0, %2
+ %0(32) = COPY %w0
+ %1(32) = G_ADD i32 %0, %s0
+...
+
+---
+# Make sure we can repair physical register defs.
+name: defaultMappingDefRepairPhysReg
+isSSA: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr }
+# CHECK-NEXT: - { id: 1, class: gpr }
+registers:
+ - { id: 0, class: _ }
+body: |
+ bb.0.entry:
+ liveins: %w0
+ ; CHECK: %0(32) = COPY %w0
+ ; CHECK-NEXT: %1(32) = G_ADD i32 %0, %0
+ ; CHECK-NEXT: %s0 = COPY %1
+ %0(32) = COPY %w0
+ %s0 = G_ADD i32 %0, %0
+...
+
+---
+# Check that the greedy mode is able to switch the
+# G_OR instruction from fpr to gpr.
+name: greedyMappingOr
+isSSA: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr }
+# CHECK-NEXT: - { id: 1, class: gpr }
+
+# Fast mode maps vector instruction on FPR.
+# FAST-NEXT: - { id: 2, class: fpr }
+# Fast mode needs two extra copies.
+# FAST-NEXT: - { id: 3, class: fpr }
+# FAST-NEXT: - { id: 4, class: fpr }
+
+# Greedy mode coalesce the computation on the GPR register
+# because it is the cheapest.
+# GREEDY-NEXT: - { id: 2, class: gpr }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0.entry:
+ liveins: %x0, %x1
+ ; CHECK: %0(64) = COPY %x0
+ ; CHECK-NEXT: %1(64) = COPY %x1
+
+
+ ; Fast mode tries to reuse the source of the copy for the destination.
+ ; Now, the default mapping says that %0 and %1 need to be in FPR.
+ ; The repairing code insert two copies to materialize that.
+ ; FAST-NEXT: %3(64) = COPY %0
+ ; FAST-NEXT: %4(64) = COPY %1
+ ; The mapping of G_OR is on FPR.
+ ; FAST-NEXT: %2(64) = G_OR <2 x i32> %3, %4
+
+ ; Greedy mode remapped the instruction on the GPR bank.
+ ; GREEDY-NEXT: %2(64) = G_OR <2 x i32> %0, %1
+ %0(64) = COPY %x0
+ %1(64) = COPY %x1
+ %2(64) = G_OR <2 x i32> %0, %1
+...
+
+---
+# Check that the greedy mode is able to switch the
+# G_OR instruction from fpr to gpr, while still honoring
+# %2 constraint.
+name: greedyMappingOrWithConstraints
+isSSA: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gpr }
+# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 2, class: fpr }
+
+# Fast mode maps vector instruction on FPR.
+# Fast mode needs two extra copies.
+# FAST-NEXT: - { id: 3, class: fpr }
+# FAST-NEXT: - { id: 4, class: fpr }
+
+# Greedy mode coalesce the computation on the GPR register because it
+# is the cheapest, but will need one extra copy to materialize %2 into a FPR.
+# GREEDY-NEXT: - { id: 3, class: gpr }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: fpr }
+body: |
+ bb.0.entry:
+ liveins: %x0, %x1
+ ; CHECK: %0(64) = COPY %x0
+ ; CHECK-NEXT: %1(64) = COPY %x1
+
+
+ ; Fast mode tries to reuse the source of the copy for the destination.
+ ; Now, the default mapping says that %0 and %1 need to be in FPR.
+ ; The repairing code insert two copies to materialize that.
+ ; FAST-NEXT: %3(64) = COPY %0
+ ; FAST-NEXT: %4(64) = COPY %1
+ ; The mapping of G_OR is on FPR.
+ ; FAST-NEXT: %2(64) = G_OR <2 x i32> %3, %4
+
+ ; Greedy mode remapped the instruction on the GPR bank.
+ ; GREEDY-NEXT: %3(64) = G_OR <2 x i32> %0, %1
+ ; We need to keep %2 into FPR because we do not know anything about it.
+ ; GREEDY-NEXT: %2(64) = COPY %3
+ %0(64) = COPY %x0
+ %1(64) = COPY %x1
+ %2(64) = G_OR <2 x i32> %0, %1
+...
diff --git a/test/CodeGen/AArch64/a57-csel.ll b/test/CodeGen/AArch64/a57-csel.ll
index f5496f777765..3c99a90fe28a 100644
--- a/test/CodeGen/AArch64/a57-csel.ll
+++ b/test/CodeGen/AArch64/a57-csel.ll
@@ -1,8 +1,9 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mcpu=cortex-a57 -aarch64-enable-early-ifcvt=false | FileCheck %s
-; Check that the select is expanded into a branch sequence.
+; Check that the select isn't expanded into a branch sequence
+; when the icmp's first operand %x0 is from load.
define i64 @f(i64 %a, i64 %b, i64* %c, i64 %d, i64 %e) {
- ; CHECK: cbz
+ ; CHECK: csel
%x0 = load i64, i64* %c
%x1 = icmp eq i64 %x0, 0
%x2 = select i1 %x1, i64 %a, i64 %b
diff --git a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
index 5eb455f3a22c..d12c4c6f9fae 100644
--- a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
+++ b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
@@ -44,11 +44,10 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!36, !37}
!llvm.ident = !{!38}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 141, file: !1, scope: !1, type: !6, variables: !12)
+!4 = distinct !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 141, file: !1, scope: !1, type: !6, variables: !12)
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
diff --git a/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll b/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll
new file mode 100644
index 000000000000..73200b581585
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=arm64
+; Make sure we are not crashing on this test.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @extern(i8*)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
+
+; Function Attrs: nounwind
+define void @func(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 {
+bb:
+ %tmp = getelementptr inbounds i8, i8* %arg2, i64 88
+ tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false)
+ store i8 0, i8* %arg3
+ store i8 2, i8* %arg2
+ store float 0.000000e+00, float* %arg
+ %tmp4 = bitcast i8* %tmp to <4 x float>*
+ store volatile <4 x float> zeroinitializer, <4 x float>* %tmp4
+ store i32 5, i32* %arg1
+ tail call void @extern(i8* %tmp)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @func2(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 {
+bb:
+ %tmp = getelementptr inbounds i8, i8* %arg2, i64 88
+ tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false)
+ store i8 0, i8* %arg3
+ store i8 2, i8* %arg2
+ store float 0.000000e+00, float* %arg
+ %tmp4 = bitcast i8* %tmp to <4 x float>*
+ store <4 x float> zeroinitializer, <4 x float>* %tmp4
+ store i32 5, i32* %arg1
+ tail call void @extern(i8* %tmp)
+ ret void
+}
+
+attributes #0 = { argmemonly nounwind }
+attributes #1 = { nounwind "target-cpu"="cortex-a53" }
diff --git a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
index b0e9d4aa7703..29b71e042611 100644
--- a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
+++ b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
@@ -1,7 +1,13 @@
-; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-EVEN
-; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-ODD
-; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN
-; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD
+; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-EVEN
+; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-ODD
+; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN
+; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD
+
+; The following tests use the balance-fp-ops feature, and should be independent of
+; the target cpu.
+
+; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN --check-prefix CHECK-BALFP
+; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD --check-prefix CHECK-BALFP
; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
; our test strategy is to:
@@ -75,7 +81,7 @@ entry:
; CHECK: fmsub [[x]]
; CHECK: fmadd [[y]]
; CHECK: fmadd [[x]]
-; CHECK-A57: stp [[x]], [[y]]
+; CHECK-BALFP: stp [[x]], [[y]]
; CHECK-A53-DAG: str [[x]]
; CHECK-A53-DAG: str [[y]]
@@ -170,7 +176,7 @@ declare void @g(...) #1
; CHECK: fmsub [[x]]
; CHECK: fmadd [[y]]
; CHECK: fmadd [[x]]
-; CHECK-A57: stp [[x]], [[y]]
+; CHECK-BALFP: stp [[x]], [[y]]
; CHECK-A53-DAG: str [[x]]
; CHECK-A53-DAG: str [[y]]
diff --git a/test/CodeGen/AArch64/aarch64-be-bv.ll b/test/CodeGen/AArch64/aarch64-be-bv.ll
index fb41156c09df..163a86b9ae4c 100644
--- a/test/CodeGen/AArch64/aarch64-be-bv.ll
+++ b/test/CodeGen/AArch64/aarch64-be-bv.ll
@@ -5,7 +5,7 @@
; CHECK-LABEL: movi_modimm_t1:
define i16 @movi_modimm_t1() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1
+ ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -17,7 +17,7 @@ define i16 @movi_modimm_t1() nounwind {
; CHECK-LABEL: movi_modimm_t2:
define i16 @movi_modimm_t2() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #8
+ ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -29,7 +29,7 @@ define i16 @movi_modimm_t2() nounwind {
; CHECK-LABEL: movi_modimm_t3:
define i16 @movi_modimm_t3() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #16
+ ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, lsl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -41,7 +41,7 @@ define i16 @movi_modimm_t3() nounwind {
; CHECK-LABEL: movi_modimm_t4:
define i16 @movi_modimm_t4() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #24
+ ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, lsl #24
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -53,7 +53,7 @@ define i16 @movi_modimm_t4() nounwind {
; CHECK-LABEL: movi_modimm_t5:
define i16 @movi_modimm_t5() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1
+ ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -65,7 +65,7 @@ define i16 @movi_modimm_t5() nounwind {
; CHECK-LABEL: movi_modimm_t6:
define i16 @movi_modimm_t6() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1, lsl #8
+ ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -77,7 +77,7 @@ define i16 @movi_modimm_t6() nounwind {
; CHECK-LABEL: movi_modimm_t7:
define i16 @movi_modimm_t7() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #8
+ ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, msl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -89,7 +89,7 @@ define i16 @movi_modimm_t7() nounwind {
; CHECK-LABEL: movi_modimm_t8:
define i16 @movi_modimm_t8() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #16
+ ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, msl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -101,7 +101,7 @@ define i16 @movi_modimm_t8() nounwind {
; CHECK-LABEL: movi_modimm_t9:
define i16 @movi_modimm_t9() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: movi v[[REG2:[0-9]+]].16b, #0x1
+ ; CHECK-NEXT: movi v[[REG2:[0-9]+]].16b, #1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -149,7 +149,7 @@ define i16 @fmov_modimm_t12() nounwind {
; CHECK-LABEL: mvni_modimm_t1:
define i16 @mvni_modimm_t1() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1
+ ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -161,7 +161,7 @@ define i16 @mvni_modimm_t1() nounwind {
; CHECK-LABEL: mvni_modimm_t2:
define i16 @mvni_modimm_t2() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #8
+ ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -173,7 +173,7 @@ define i16 @mvni_modimm_t2() nounwind {
; CHECK-LABEL: mvni_modimm_t3:
define i16 @mvni_modimm_t3() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #16
+ ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, lsl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -185,7 +185,7 @@ define i16 @mvni_modimm_t3() nounwind {
; CHECK-LABEL: mvni_modimm_t4:
define i16 @mvni_modimm_t4() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #24
+ ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, lsl #24
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -197,7 +197,7 @@ define i16 @mvni_modimm_t4() nounwind {
; CHECK-LABEL: mvni_modimm_t5:
define i16 @mvni_modimm_t5() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1
+ ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -209,7 +209,7 @@ define i16 @mvni_modimm_t5() nounwind {
; CHECK-LABEL: mvni_modimm_t6:
define i16 @mvni_modimm_t6() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1, lsl #8
+ ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -221,7 +221,7 @@ define i16 @mvni_modimm_t6() nounwind {
; CHECK-LABEL: mvni_modimm_t7:
define i16 @mvni_modimm_t7() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #8
+ ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, msl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -233,7 +233,7 @@ define i16 @mvni_modimm_t7() nounwind {
; CHECK-LABEL: mvni_modimm_t8:
define i16 @mvni_modimm_t8() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #16
+ ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, msl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
@@ -245,7 +245,7 @@ define i16 @mvni_modimm_t8() nounwind {
; CHECK-LABEL: bic_modimm_t1:
define i16 @bic_modimm_t1() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1
+ ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #1
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535>
@@ -256,7 +256,7 @@ define i16 @bic_modimm_t1() nounwind {
; CHECK-LABEL: bic_modimm_t2:
define i16 @bic_modimm_t2() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1, lsl #8
+ ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #1, lsl #8
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535>
@@ -267,7 +267,7 @@ define i16 @bic_modimm_t2() nounwind {
; CHECK-LABEL: bic_modimm_t3:
define i16 @bic_modimm_t3() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1, lsl #16
+ ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #1, lsl #16
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
@@ -278,7 +278,7 @@ define i16 @bic_modimm_t3() nounwind {
; CHECK-LABEL: bic_modimm_t4:
define i16 @bic_modimm_t4() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1, lsl #24
+ ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #1, lsl #24
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279>
@@ -289,7 +289,7 @@ define i16 @bic_modimm_t4() nounwind {
; CHECK-LABEL: bic_modimm_t5:
define i16 @bic_modimm_t5() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #0x1
+ ; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #1
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>
@@ -300,7 +300,7 @@ define i16 @bic_modimm_t5() nounwind {
; CHECK-LABEL: bic_modimm_t6:
define i16 @bic_modimm_t6() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #0x1, lsl #8
+ ; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #1, lsl #8
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279>
@@ -311,7 +311,7 @@ define i16 @bic_modimm_t6() nounwind {
; CHECK-LABEL: orr_modimm_t1:
define i16 @orr_modimm_t1() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1
+ ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #1
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0>
@@ -322,7 +322,7 @@ define i16 @orr_modimm_t1() nounwind {
; CHECK-LABEL: orr_modimm_t2:
define i16 @orr_modimm_t2() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1, lsl #8
+ ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #1, lsl #8
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0>
@@ -333,7 +333,7 @@ define i16 @orr_modimm_t2() nounwind {
; CHECK-LABEL: orr_modimm_t3:
define i16 @orr_modimm_t3() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1, lsl #16
+ ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #1, lsl #16
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
@@ -344,7 +344,7 @@ define i16 @orr_modimm_t3() nounwind {
; CHECK-LABEL: orr_modimm_t4:
define i16 @orr_modimm_t4() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1, lsl #24
+ ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #1, lsl #24
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256>
@@ -355,7 +355,7 @@ define i16 @orr_modimm_t4() nounwind {
; CHECK-LABEL: orr_modimm_t5:
define i16 @orr_modimm_t5() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #0x1
+ ; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #1
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -366,7 +366,7 @@ define i16 @orr_modimm_t5() nounwind {
; CHECK-LABEL: orr_modimm_t6:
define i16 @orr_modimm_t6() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
- ; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #0x1, lsl #8
+ ; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #1, lsl #8
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
@@ -385,37 +385,37 @@ declare i64 @f_v2i64(<2 x i64> %arg)
; CHECK-LABEL: modimm_t1_call:
define void @modimm_t1_call() {
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 8, i8 0, i8 0, i8 0, i8 8, i8 0, i8 0, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #7
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 7, i16 0, i16 7, i16 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #6
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 6, i32 6>)
- ; CHECK: movi v{{[0-9]+}}.2s, #0x5
+ ; CHECK: movi v{{[0-9]+}}.2s, #5
; CHECK-NEXT: bl f_v1i64
call i64 @f_v1i64(<1 x i64> <i64 21474836485>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #5
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #4
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 4, i16 0, i16 4, i16 0, i16 4, i16 0, i16 4, i16 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #3
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 3, i32 3, i32 3, i32 3>)
- ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2
+ ; CHECK: movi v[[REG:[0-9]+]].4s, #2
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v2i64
call i64 @f_v2i64(<2 x i64> <i64 8589934594, i64 8589934594>)
@@ -425,37 +425,37 @@ define void @modimm_t1_call() {
; CHECK-LABEL: modimm_t2_call:
define void @modimm_t2_call() {
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #8, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 0, i8 8, i8 0, i8 0, i8 0, i8 8, i8 0, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #7, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 1792, i16 0, i16 1792, i16 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #6, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 1536, i32 1536>)
- ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #8
+ ; CHECK: movi v{{[0-9]+}}.2s, #5, lsl #8
; CHECK-NEXT: bl f_v1i64
call i64 @f_v1i64(<1 x i64> <i64 5497558140160>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #5, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #4, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 1024, i16 0, i16 1024, i16 0, i16 1024, i16 0, i16 1024, i16 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #3, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 768, i32 768, i32 768, i32 768>)
- ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #8
+ ; CHECK: movi v[[REG:[0-9]+]].4s, #2, lsl #8
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v2i64
call i64 @f_v2i64(<2 x i64> <i64 2199023256064, i64 2199023256064>)
@@ -465,37 +465,37 @@ define void @modimm_t2_call() {
; CHECK-LABEL: modimm_t3_call:
define void @modimm_t3_call() {
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #8, lsl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 0, i8 0, i8 8, i8 0, i8 0, i8 0, i8 8, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #7, lsl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 0, i16 7, i16 0, i16 7>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #6, lsl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 393216, i32 393216>)
- ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #16
+ ; CHECK: movi v{{[0-9]+}}.2s, #5, lsl #16
; CHECK-NEXT: bl f_v1i64
call i64 @f_v1i64(<1 x i64> <i64 1407374883880960>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #5, lsl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #4, lsl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 0, i16 4, i16 0, i16 4, i16 0, i16 4, i16 0, i16 4>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #3, lsl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 196608, i32 196608, i32 196608, i32 196608>)
- ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #16
+ ; CHECK: movi v[[REG:[0-9]+]].4s, #2, lsl #16
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v2i64
call i64 @f_v2i64(<2 x i64> <i64 562949953552384, i64 562949953552384>)
@@ -505,37 +505,37 @@ define void @modimm_t3_call() {
; CHECK-LABEL: modimm_t4_call:
define void @modimm_t4_call() {
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #24
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #8, lsl #24
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 0, i8 0, i8 0, i8 8, i8 0, i8 0, i8 0, i8 8>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #24
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #7, lsl #24
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 0, i16 1792, i16 0, i16 1792>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #24
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #6, lsl #24
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 100663296, i32 100663296>)
- ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #24
+ ; CHECK: movi v{{[0-9]+}}.2s, #5, lsl #24
; CHECK-NEXT: bl f_v1i64
call i64 @f_v1i64(<1 x i64> <i64 360287970273525760>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #24
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #5, lsl #24
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #24
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #4, lsl #24
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 0, i16 1024, i16 0, i16 1024, i16 0, i16 1024, i16 0, i16 1024>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #24
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #3, lsl #24
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 50331648, i32 50331648, i32 50331648, i32 50331648>)
- ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #24
+ ; CHECK: movi v[[REG:[0-9]+]].4s, #2, lsl #24
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v2i64
call i64 @f_v2i64(<2 x i64> <i64 144115188109410304, i64 144115188109410304>)
@@ -545,37 +545,37 @@ define void @modimm_t4_call() {
; CHECK-LABEL: modimm_t5_call:
define void @modimm_t5_call() {
- ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x8
+ ; CHECK: movi v[[REG1:[0-9]+]].4h, #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 8, i8 0, i8 8, i8 0, i8 8, i8 0, i8 8, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x7
+ ; CHECK: movi v[[REG1:[0-9]+]].4h, #7
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 7, i16 7, i16 7, i16 7>)
- ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x6
+ ; CHECK: movi v[[REG1:[0-9]+]].4h, #6
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 393222, i32 393222>)
- ; CHECK: movi v{{[0-9]+}}.4h, #0x5
+ ; CHECK: movi v{{[0-9]+}}.4h, #5
; CHECK-NEXT: bl f_v1i64
call i64 @f_v1i64(<1 x i64> <i64 1407396358717445>)
- ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5
+ ; CHECK: movi v[[REG1:[0-9]+]].8h, #5
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x4
+ ; CHECK: movi v[[REG1:[0-9]+]].8h, #4
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>)
- ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x3
+ ; CHECK: movi v[[REG1:[0-9]+]].8h, #3
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 196611, i32 196611, i32 196611, i32 196611>)
- ; CHECK: movi v[[REG:[0-9]+]].8h, #0x2
+ ; CHECK: movi v[[REG:[0-9]+]].8h, #2
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v2i64
call i64 @f_v2i64(<2 x i64> <i64 562958543486978, i64 562958543486978>)
@@ -585,37 +585,37 @@ define void @modimm_t5_call() {
; CHECK-LABEL: modimm_t6_call:
define void @modimm_t6_call() {
- ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x8, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].4h, #8, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 0, i8 8, i8 0, i8 8, i8 0, i8 8, i8 0, i8 8>)
- ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x7, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].4h, #7, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 1792, i16 1792, i16 1792, i16 1792>)
- ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x6, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].4h, #6, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 100664832, i32 100664832>)
- ; CHECK: movi v{{[0-9]+}}.4h, #0x5, lsl #8
+ ; CHECK: movi v{{[0-9]+}}.4h, #5, lsl #8
; CHECK-NEXT: bl f_v1i64
call i64 @f_v1i64(<1 x i64> <i64 360293467831665920>)
- ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].8h, #5, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5>)
- ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x4, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].8h, #4, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>)
- ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x3, lsl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].8h, #3, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 50332416, i32 50332416, i32 50332416, i32 50332416>)
- ; CHECK: movi v[[REG:[0-9]+]].8h, #0x2, lsl #8
+ ; CHECK: movi v[[REG:[0-9]+]].8h, #2, lsl #8
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v2i64
call i64 @f_v2i64(<2 x i64> <i64 144117387132666368, i64 144117387132666368>)
@@ -625,37 +625,37 @@ define void @modimm_t6_call() {
; CHECK-LABEL: modimm_t7_call:
define void @modimm_t7_call() {
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, msl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #8, msl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 255, i8 8, i8 0, i8 0, i8 255, i8 8, i8 0, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, msl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #7, msl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 2047, i16 0, i16 2047, i16 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, msl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #6, msl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 1791, i32 1791>)
- ; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #8
+ ; CHECK: movi v{{[0-9]+}}.2s, #5, msl #8
; CHECK-NEXT: bl f_v1i64
call i64 @f_v1i64(<1 x i64> <i64 6592774800895>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #5, msl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 255, i8 5, i8 0, i8 0, i8 255, i8 5, i8 0, i8 0, i8 255, i8 5, i8 0, i8 0, i8 255, i8 5, i8 0, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, msl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #4, msl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 1279, i16 0, i16 1279, i16 0, i16 1279, i16 0, i16 1279, i16 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, msl #8
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #3, msl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 1023, i32 1023, i32 1023, i32 1023>)
- ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #8
+ ; CHECK: movi v[[REG:[0-9]+]].4s, #2, msl #8
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v2i64
call i64 @f_v2i64(<2 x i64> <i64 3294239916799, i64 3294239916799>)
@@ -665,37 +665,37 @@ define void @modimm_t7_call() {
; CHECK-LABEL: modimm_t8_call:
define void @modimm_t8_call() {
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, msl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #8, msl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 255, i8 255, i8 8, i8 0, i8 255, i8 255, i8 8, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, msl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #7, msl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 65535, i16 7, i16 65535, i16 7>)
- ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, msl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].2s, #6, msl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 458751, i32 458751>)
- ; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #16
+ ; CHECK: movi v{{[0-9]+}}.2s, #5, msl #16
; CHECK-NEXT: bl f_v1i64
call i64 @f_v1i64(<1 x i64> <i64 1688845565689855>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #5, msl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 255, i8 255, i8 5, i8 0, i8 255, i8 255, i8 5, i8 0, i8 255, i8 255, i8 5, i8 0, i8 255, i8 255, i8 5, i8 0>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, msl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #4, msl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 65535, i16 4, i16 65535, i16 4, i16 65535, i16 4, i16 65535, i16 4>)
- ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, msl #16
+ ; CHECK: movi v[[REG1:[0-9]+]].4s, #3, msl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 262143, i32 262143, i32 262143, i32 262143>)
- ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #16
+ ; CHECK: movi v[[REG:[0-9]+]].4s, #2, msl #16
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v2i64
call i64 @f_v2i64(<2 x i64> <i64 844420635361279, i64 844420635361279>)
@@ -705,29 +705,29 @@ define void @modimm_t8_call() {
; CHECK-LABEL: modimm_t9_call:
define void @modimm_t9_call() {
- ; CHECK: movi v[[REG1:[0-9]+]].8b, #0x8
+ ; CHECK: movi v[[REG1:[0-9]+]].8b, #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
- ; CHECK: movi v[[REG1:[0-9]+]].8b, #0x7
+ ; CHECK: movi v[[REG1:[0-9]+]].8b, #7
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 1799, i16 1799, i16 1799, i16 1799>)
- ; CHECK: movi v[[REG1:[0-9]+]].8b, #0x6
+ ; CHECK: movi v[[REG1:[0-9]+]].8b, #6
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 101058054, i32 101058054>)
- ; CHECK: movi v[[REG1:[0-9]+]].16b, #0x5
+ ; CHECK: movi v[[REG1:[0-9]+]].16b, #5
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>)
- ; CHECK: movi v[[REG1:[0-9]+]].16b, #0x4
+ ; CHECK: movi v[[REG1:[0-9]+]].16b, #4
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>)
- ; CHECK: movi v[[REG1:[0-9]+]].16b, #0x3
+ ; CHECK: movi v[[REG1:[0-9]+]].16b, #3
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
diff --git a/test/CodeGen/AArch64/aarch64-deferred-spilling.ll b/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
deleted file mode 100644
index 7accdced7d44..000000000000
--- a/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
+++ /dev/null
@@ -1,514 +0,0 @@
-;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED
-;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR
-
-; Check that we do not end up with useless spill code.
-;
-; Move to the basic block we are interested in.
-;
-; CHECK: // %if.then.120
-;
-; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill
-; Check that w21 wouldn't need to be spilled since it is never reused.
-; REGULAR-NOT: {{[wx]}}21{{,?}}
-;
-; Check that w22 is used to carry a value through the call.
-; DEFERRED-NOT: str {{[wx]}}22,
-; DEFERRED: mov {{[wx]}}22,
-; DEFERRED-NOT: str {{[wx]}}22,
-;
-; CHECK: bl fprintf
-;
-; DEFERRED-NOT: ldr {{[wx]}}22,
-; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22
-; DEFERRED-NOT: ldr {{[wx]}}22,
-;
-; REGULAR-NOT: {{[wx]}}21{{,?}}
-; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload
-;
-; End of the basic block we are interested in.
-; CHECK: b
-; CHECK: {{[^:]+}}: // %sw.bb.123
-
-%struct.__sFILE = type { i8*, i32, i32, i32, i32, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-%struct.__sbuf = type { i8*, i64 }
-%struct.DState = type { %struct.bz_stream*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
-%struct.bz_stream = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* }
-
-@__sF = external global [0 x %struct.__sFILE], align 8
-@.str = private unnamed_addr constant [20 x i8] c"\0A [%d: stuff+mf \00", align 1
-
-declare i32 @fprintf(%struct.__sFILE* nocapture, i8* nocapture readonly, ...)
-
-declare void @bar(i32)
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
-
-define i32 @foo(%struct.DState* %s) {
-entry:
- %state = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 1
- %tmp = load i32, i32* %state, align 4
- %cmp = icmp eq i32 %tmp, 10
- %save_i = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 40
- br i1 %cmp, label %if.end.thread, label %if.end
-
-if.end.thread: ; preds = %entry
- %save_j = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
- %save_t = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
- %save_alphaSize = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
- %save_nGroups = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
- %save_nSelectors = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
- %save_EOB = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
- %save_groupNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
- %save_groupPos = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
- %save_nextSym = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
- %save_nblockMAX = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
- %save_nblock = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
- %save_es = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
- %save_N = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
- %save_curr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
- %save_zt = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
- %save_zn = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
- %save_zvec = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
- %save_zj = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
- %tmp1 = bitcast i32* %save_i to i8*
- call void @llvm.memset.p0i8.i64(i8* %tmp1, i8 0, i64 108, i32 4, i1 false)
- br label %sw.default
-
-if.end: ; preds = %entry
- %.pre = load i32, i32* %save_i, align 4
- %save_j3.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
- %.pre406 = load i32, i32* %save_j3.phi.trans.insert, align 4
- %save_t4.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
- %.pre407 = load i32, i32* %save_t4.phi.trans.insert, align 4
- %save_alphaSize5.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
- %.pre408 = load i32, i32* %save_alphaSize5.phi.trans.insert, align 4
- %save_nGroups6.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
- %.pre409 = load i32, i32* %save_nGroups6.phi.trans.insert, align 4
- %save_nSelectors7.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
- %.pre410 = load i32, i32* %save_nSelectors7.phi.trans.insert, align 4
- %save_EOB8.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
- %.pre411 = load i32, i32* %save_EOB8.phi.trans.insert, align 4
- %save_groupNo9.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
- %.pre412 = load i32, i32* %save_groupNo9.phi.trans.insert, align 4
- %save_groupPos10.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
- %.pre413 = load i32, i32* %save_groupPos10.phi.trans.insert, align 4
- %save_nextSym11.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
- %.pre414 = load i32, i32* %save_nextSym11.phi.trans.insert, align 4
- %save_nblockMAX12.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
- %.pre415 = load i32, i32* %save_nblockMAX12.phi.trans.insert, align 4
- %save_nblock13.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
- %.pre416 = load i32, i32* %save_nblock13.phi.trans.insert, align 4
- %save_es14.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
- %.pre417 = load i32, i32* %save_es14.phi.trans.insert, align 4
- %save_N15.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
- %.pre418 = load i32, i32* %save_N15.phi.trans.insert, align 4
- %save_curr16.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
- %.pre419 = load i32, i32* %save_curr16.phi.trans.insert, align 4
- %save_zt17.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
- %.pre420 = load i32, i32* %save_zt17.phi.trans.insert, align 4
- %save_zn18.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
- %.pre421 = load i32, i32* %save_zn18.phi.trans.insert, align 4
- %save_zvec19.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
- %.pre422 = load i32, i32* %save_zvec19.phi.trans.insert, align 4
- %save_zj20.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
- %.pre423 = load i32, i32* %save_zj20.phi.trans.insert, align 4
- switch i32 %tmp, label %sw.default [
- i32 13, label %sw.bb
- i32 14, label %if.end.sw.bb.65_crit_edge
- i32 25, label %if.end.sw.bb.123_crit_edge
- ]
-
-if.end.sw.bb.123_crit_edge: ; preds = %if.end
- %.pre433 = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
- br label %sw.bb.123
-
-if.end.sw.bb.65_crit_edge: ; preds = %if.end
- %bsLive69.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
- %.pre426 = load i32, i32* %bsLive69.phi.trans.insert, align 4
- br label %sw.bb.65
-
-sw.bb: ; preds = %if.end
- %sunkaddr = ptrtoint %struct.DState* %s to i64
- %sunkaddr485 = add i64 %sunkaddr, 8
- %sunkaddr486 = inttoptr i64 %sunkaddr485 to i32*
- store i32 13, i32* %sunkaddr486, align 4
- %bsLive = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
- %tmp2 = load i32, i32* %bsLive, align 4
- %cmp28.400 = icmp sgt i32 %tmp2, 7
- br i1 %cmp28.400, label %sw.bb.if.then.29_crit_edge, label %if.end.33.lr.ph
-
-sw.bb.if.then.29_crit_edge: ; preds = %sw.bb
- %sunkaddr487 = ptrtoint %struct.DState* %s to i64
- %sunkaddr488 = add i64 %sunkaddr487, 32
- %sunkaddr489 = inttoptr i64 %sunkaddr488 to i32*
- %.pre425 = load i32, i32* %sunkaddr489, align 4
- br label %if.then.29
-
-if.end.33.lr.ph: ; preds = %sw.bb
- %tmp3 = bitcast %struct.DState* %s to %struct.bz_stream**
- %.pre424 = load %struct.bz_stream*, %struct.bz_stream** %tmp3, align 8
- %avail_in.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre424, i64 0, i32 1
- %.pre430 = load i32, i32* %avail_in.phi.trans.insert, align 4
- %tmp4 = add i32 %.pre430, -1
- br label %if.end.33
-
-if.then.29: ; preds = %while.body.backedge, %sw.bb.if.then.29_crit_edge
- %tmp5 = phi i32 [ %.pre425, %sw.bb.if.then.29_crit_edge ], [ %or, %while.body.backedge ]
- %.lcssa393 = phi i32 [ %tmp2, %sw.bb.if.then.29_crit_edge ], [ %add, %while.body.backedge ]
- %sub = add nsw i32 %.lcssa393, -8
- %shr = lshr i32 %tmp5, %sub
- %and = and i32 %shr, 255
- %sunkaddr491 = ptrtoint %struct.DState* %s to i64
- %sunkaddr492 = add i64 %sunkaddr491, 36
- %sunkaddr493 = inttoptr i64 %sunkaddr492 to i32*
- store i32 %sub, i32* %sunkaddr493, align 4
- %blockSize100k = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 9
- store i32 %and, i32* %blockSize100k, align 4
- %and.off = add nsw i32 %and, -49
- %tmp6 = icmp ugt i32 %and.off, 8
- br i1 %tmp6, label %save_state_and_return, label %if.end.62
-
-if.end.33: ; preds = %while.body.backedge, %if.end.33.lr.ph
- %lsr.iv482 = phi i32 [ %tmp4, %if.end.33.lr.ph ], [ %lsr.iv.next483, %while.body.backedge ]
- %tmp7 = phi i32 [ %tmp2, %if.end.33.lr.ph ], [ %add, %while.body.backedge ]
- %cmp35 = icmp eq i32 %lsr.iv482, -1
- br i1 %cmp35, label %save_state_and_return, label %if.end.37
-
-if.end.37: ; preds = %if.end.33
- %tmp8 = bitcast %struct.bz_stream* %.pre424 to i8**
- %sunkaddr494 = ptrtoint %struct.DState* %s to i64
- %sunkaddr495 = add i64 %sunkaddr494, 32
- %sunkaddr496 = inttoptr i64 %sunkaddr495 to i32*
- %tmp9 = load i32, i32* %sunkaddr496, align 4
- %shl = shl i32 %tmp9, 8
- %tmp10 = load i8*, i8** %tmp8, align 8
- %tmp11 = load i8, i8* %tmp10, align 1
- %conv = zext i8 %tmp11 to i32
- %or = or i32 %conv, %shl
- store i32 %or, i32* %sunkaddr496, align 4
- %add = add nsw i32 %tmp7, 8
- %sunkaddr497 = ptrtoint %struct.DState* %s to i64
- %sunkaddr498 = add i64 %sunkaddr497, 36
- %sunkaddr499 = inttoptr i64 %sunkaddr498 to i32*
- store i32 %add, i32* %sunkaddr499, align 4
- %incdec.ptr = getelementptr inbounds i8, i8* %tmp10, i64 1
- store i8* %incdec.ptr, i8** %tmp8, align 8
- %sunkaddr500 = ptrtoint %struct.bz_stream* %.pre424 to i64
- %sunkaddr501 = add i64 %sunkaddr500, 8
- %sunkaddr502 = inttoptr i64 %sunkaddr501 to i32*
- store i32 %lsr.iv482, i32* %sunkaddr502, align 4
- %sunkaddr503 = ptrtoint %struct.bz_stream* %.pre424 to i64
- %sunkaddr504 = add i64 %sunkaddr503, 12
- %sunkaddr505 = inttoptr i64 %sunkaddr504 to i32*
- %tmp12 = load i32, i32* %sunkaddr505, align 4
- %inc = add i32 %tmp12, 1
- store i32 %inc, i32* %sunkaddr505, align 4
- %cmp49 = icmp eq i32 %inc, 0
- br i1 %cmp49, label %if.then.51, label %while.body.backedge
-
-if.then.51: ; preds = %if.end.37
- %sunkaddr506 = ptrtoint %struct.bz_stream* %.pre424 to i64
- %sunkaddr507 = add i64 %sunkaddr506, 16
- %sunkaddr508 = inttoptr i64 %sunkaddr507 to i32*
- %tmp13 = load i32, i32* %sunkaddr508, align 4
- %inc53 = add i32 %tmp13, 1
- store i32 %inc53, i32* %sunkaddr508, align 4
- br label %while.body.backedge
-
-while.body.backedge: ; preds = %if.then.51, %if.end.37
- %lsr.iv.next483 = add i32 %lsr.iv482, -1
- %cmp28 = icmp sgt i32 %add, 7
- br i1 %cmp28, label %if.then.29, label %if.end.33
-
-if.end.62: ; preds = %if.then.29
- %sub64 = add nsw i32 %and, -48
- %sunkaddr509 = ptrtoint %struct.DState* %s to i64
- %sunkaddr510 = add i64 %sunkaddr509, 40
- %sunkaddr511 = inttoptr i64 %sunkaddr510 to i32*
- store i32 %sub64, i32* %sunkaddr511, align 4
- br label %sw.bb.65
-
-sw.bb.65: ; preds = %if.end.62, %if.end.sw.bb.65_crit_edge
- %bsLive69.pre-phi = phi i32* [ %bsLive69.phi.trans.insert, %if.end.sw.bb.65_crit_edge ], [ %bsLive, %if.end.62 ]
- %tmp14 = phi i32 [ %.pre426, %if.end.sw.bb.65_crit_edge ], [ %sub, %if.end.62 ]
- %sunkaddr512 = ptrtoint %struct.DState* %s to i64
- %sunkaddr513 = add i64 %sunkaddr512, 8
- %sunkaddr514 = inttoptr i64 %sunkaddr513 to i32*
- store i32 14, i32* %sunkaddr514, align 4
- %cmp70.397 = icmp sgt i32 %tmp14, 7
- br i1 %cmp70.397, label %if.then.72, label %if.end.82.lr.ph
-
-if.end.82.lr.ph: ; preds = %sw.bb.65
- %tmp15 = bitcast %struct.DState* %s to %struct.bz_stream**
- %.pre427 = load %struct.bz_stream*, %struct.bz_stream** %tmp15, align 8
- %avail_in84.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre427, i64 0, i32 1
- %.pre431 = load i32, i32* %avail_in84.phi.trans.insert, align 4
- %tmp16 = add i32 %.pre431, -1
- br label %if.end.82
-
-if.then.72: ; preds = %while.body.68.backedge, %sw.bb.65
- %.lcssa390 = phi i32 [ %tmp14, %sw.bb.65 ], [ %add97, %while.body.68.backedge ]
- %sub76 = add nsw i32 %.lcssa390, -8
- %sunkaddr516 = ptrtoint %struct.DState* %s to i64
- %sunkaddr517 = add i64 %sunkaddr516, 36
- %sunkaddr518 = inttoptr i64 %sunkaddr517 to i32*
- store i32 %sub76, i32* %sunkaddr518, align 4
- %currBlockNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 11
- %tmp17 = load i32, i32* %currBlockNo, align 4
- %inc117 = add nsw i32 %tmp17, 1
- store i32 %inc117, i32* %currBlockNo, align 4
- %verbosity = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 12
- %tmp18 = load i32, i32* %verbosity, align 4
- %cmp118 = icmp sgt i32 %tmp18, 1
- br i1 %cmp118, label %if.then.120, label %sw.bb.123, !prof !0
-
-if.end.82: ; preds = %while.body.68.backedge, %if.end.82.lr.ph
- %lsr.iv480 = phi i32 [ %tmp16, %if.end.82.lr.ph ], [ %lsr.iv.next481, %while.body.68.backedge ]
- %tmp19 = phi i32 [ %tmp14, %if.end.82.lr.ph ], [ %add97, %while.body.68.backedge ]
- %cmp85 = icmp eq i32 %lsr.iv480, -1
- br i1 %cmp85, label %save_state_and_return, label %if.end.88
-
-if.end.88: ; preds = %if.end.82
- %tmp20 = bitcast %struct.bz_stream* %.pre427 to i8**
- %sunkaddr519 = ptrtoint %struct.DState* %s to i64
- %sunkaddr520 = add i64 %sunkaddr519, 32
- %sunkaddr521 = inttoptr i64 %sunkaddr520 to i32*
- %tmp21 = load i32, i32* %sunkaddr521, align 4
- %shl90 = shl i32 %tmp21, 8
- %tmp22 = load i8*, i8** %tmp20, align 8
- %tmp23 = load i8, i8* %tmp22, align 1
- %conv93 = zext i8 %tmp23 to i32
- %or94 = or i32 %conv93, %shl90
- store i32 %or94, i32* %sunkaddr521, align 4
- %add97 = add nsw i32 %tmp19, 8
- %sunkaddr522 = ptrtoint %struct.DState* %s to i64
- %sunkaddr523 = add i64 %sunkaddr522, 36
- %sunkaddr524 = inttoptr i64 %sunkaddr523 to i32*
- store i32 %add97, i32* %sunkaddr524, align 4
- %incdec.ptr100 = getelementptr inbounds i8, i8* %tmp22, i64 1
- store i8* %incdec.ptr100, i8** %tmp20, align 8
- %sunkaddr525 = ptrtoint %struct.bz_stream* %.pre427 to i64
- %sunkaddr526 = add i64 %sunkaddr525, 8
- %sunkaddr527 = inttoptr i64 %sunkaddr526 to i32*
- store i32 %lsr.iv480, i32* %sunkaddr527, align 4
- %sunkaddr528 = ptrtoint %struct.bz_stream* %.pre427 to i64
- %sunkaddr529 = add i64 %sunkaddr528, 12
- %sunkaddr530 = inttoptr i64 %sunkaddr529 to i32*
- %tmp24 = load i32, i32* %sunkaddr530, align 4
- %inc106 = add i32 %tmp24, 1
- store i32 %inc106, i32* %sunkaddr530, align 4
- %cmp109 = icmp eq i32 %inc106, 0
- br i1 %cmp109, label %if.then.111, label %while.body.68.backedge
-
-if.then.111: ; preds = %if.end.88
- %sunkaddr531 = ptrtoint %struct.bz_stream* %.pre427 to i64
- %sunkaddr532 = add i64 %sunkaddr531, 16
- %sunkaddr533 = inttoptr i64 %sunkaddr532 to i32*
- %tmp25 = load i32, i32* %sunkaddr533, align 4
- %inc114 = add i32 %tmp25, 1
- store i32 %inc114, i32* %sunkaddr533, align 4
- br label %while.body.68.backedge
-
-while.body.68.backedge: ; preds = %if.then.111, %if.end.88
- %lsr.iv.next481 = add i32 %lsr.iv480, -1
- %cmp70 = icmp sgt i32 %add97, 7
- br i1 %cmp70, label %if.then.72, label %if.end.82
-
-if.then.120: ; preds = %if.then.72
- %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* getelementptr inbounds ([0 x %struct.__sFILE], [0 x %struct.__sFILE]* @__sF, i64 0, i64 2), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i64 0, i64 0), i32 %inc117)
- br label %sw.bb.123
-
-sw.bb.123: ; preds = %if.then.120, %if.then.72, %if.end.sw.bb.123_crit_edge
- %bsLive127.pre-phi = phi i32* [ %.pre433, %if.end.sw.bb.123_crit_edge ], [ %bsLive69.pre-phi, %if.then.72 ], [ %bsLive69.pre-phi, %if.then.120 ]
- %sunkaddr534 = ptrtoint %struct.DState* %s to i64
- %sunkaddr535 = add i64 %sunkaddr534, 8
- %sunkaddr536 = inttoptr i64 %sunkaddr535 to i32*
- store i32 25, i32* %sunkaddr536, align 4
- %tmp26 = load i32, i32* %bsLive127.pre-phi, align 4
- %cmp128.395 = icmp sgt i32 %tmp26, 7
- br i1 %cmp128.395, label %sw.bb.123.if.then.130_crit_edge, label %if.end.140.lr.ph
-
-sw.bb.123.if.then.130_crit_edge: ; preds = %sw.bb.123
- %sunkaddr537 = ptrtoint %struct.DState* %s to i64
- %sunkaddr538 = add i64 %sunkaddr537, 32
- %sunkaddr539 = inttoptr i64 %sunkaddr538 to i32*
- %.pre429 = load i32, i32* %sunkaddr539, align 4
- br label %if.then.130
-
-if.end.140.lr.ph: ; preds = %sw.bb.123
- %tmp27 = bitcast %struct.DState* %s to %struct.bz_stream**
- %.pre428 = load %struct.bz_stream*, %struct.bz_stream** %tmp27, align 8
- %avail_in142.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre428, i64 0, i32 1
- %.pre432 = load i32, i32* %avail_in142.phi.trans.insert, align 4
- %tmp28 = add i32 %.pre432, -1
- br label %if.end.140
-
-if.then.130: ; preds = %while.body.126.backedge, %sw.bb.123.if.then.130_crit_edge
- %tmp29 = phi i32 [ %.pre429, %sw.bb.123.if.then.130_crit_edge ], [ %or152, %while.body.126.backedge ]
- %.lcssa = phi i32 [ %tmp26, %sw.bb.123.if.then.130_crit_edge ], [ %add155, %while.body.126.backedge ]
- %sub134 = add nsw i32 %.lcssa, -8
- %shr135 = lshr i32 %tmp29, %sub134
- store i32 %sub134, i32* %bsLive127.pre-phi, align 4
- %origPtr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 13
- %tmp30 = load i32, i32* %origPtr, align 4
- %shl175 = shl i32 %tmp30, 8
- %conv176 = and i32 %shr135, 255
- %or177 = or i32 %shl175, %conv176
- store i32 %or177, i32* %origPtr, align 4
- %nInUse = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 27
- %tmp31 = load i32, i32* %nInUse, align 4
- %add179 = add nsw i32 %tmp31, 2
- br label %save_state_and_return
-
-if.end.140: ; preds = %while.body.126.backedge, %if.end.140.lr.ph
- %lsr.iv = phi i32 [ %tmp28, %if.end.140.lr.ph ], [ %lsr.iv.next, %while.body.126.backedge ]
- %tmp32 = phi i32 [ %tmp26, %if.end.140.lr.ph ], [ %add155, %while.body.126.backedge ]
- %cmp143 = icmp eq i32 %lsr.iv, -1
- br i1 %cmp143, label %save_state_and_return, label %if.end.146
-
-if.end.146: ; preds = %if.end.140
- %tmp33 = bitcast %struct.bz_stream* %.pre428 to i8**
- %sunkaddr541 = ptrtoint %struct.DState* %s to i64
- %sunkaddr542 = add i64 %sunkaddr541, 32
- %sunkaddr543 = inttoptr i64 %sunkaddr542 to i32*
- %tmp34 = load i32, i32* %sunkaddr543, align 4
- %shl148 = shl i32 %tmp34, 8
- %tmp35 = load i8*, i8** %tmp33, align 8
- %tmp36 = load i8, i8* %tmp35, align 1
- %conv151 = zext i8 %tmp36 to i32
- %or152 = or i32 %conv151, %shl148
- store i32 %or152, i32* %sunkaddr543, align 4
- %add155 = add nsw i32 %tmp32, 8
- store i32 %add155, i32* %bsLive127.pre-phi, align 4
- %incdec.ptr158 = getelementptr inbounds i8, i8* %tmp35, i64 1
- store i8* %incdec.ptr158, i8** %tmp33, align 8
- %sunkaddr544 = ptrtoint %struct.bz_stream* %.pre428 to i64
- %sunkaddr545 = add i64 %sunkaddr544, 8
- %sunkaddr546 = inttoptr i64 %sunkaddr545 to i32*
- store i32 %lsr.iv, i32* %sunkaddr546, align 4
- %sunkaddr547 = ptrtoint %struct.bz_stream* %.pre428 to i64
- %sunkaddr548 = add i64 %sunkaddr547, 12
- %sunkaddr549 = inttoptr i64 %sunkaddr548 to i32*
- %tmp37 = load i32, i32* %sunkaddr549, align 4
- %inc164 = add i32 %tmp37, 1
- store i32 %inc164, i32* %sunkaddr549, align 4
- %cmp167 = icmp eq i32 %inc164, 0
- br i1 %cmp167, label %if.then.169, label %while.body.126.backedge
-
-if.then.169: ; preds = %if.end.146
- %sunkaddr550 = ptrtoint %struct.bz_stream* %.pre428 to i64
- %sunkaddr551 = add i64 %sunkaddr550, 16
- %sunkaddr552 = inttoptr i64 %sunkaddr551 to i32*
- %tmp38 = load i32, i32* %sunkaddr552, align 4
- %inc172 = add i32 %tmp38, 1
- store i32 %inc172, i32* %sunkaddr552, align 4
- br label %while.body.126.backedge
-
-while.body.126.backedge: ; preds = %if.then.169, %if.end.146
- %lsr.iv.next = add i32 %lsr.iv, -1
- %cmp128 = icmp sgt i32 %add155, 7
- br i1 %cmp128, label %if.then.130, label %if.end.140
-
-sw.default: ; preds = %if.end, %if.end.thread
- %tmp39 = phi i32 [ 0, %if.end.thread ], [ %.pre, %if.end ]
- %tmp40 = phi i32 [ 0, %if.end.thread ], [ %.pre406, %if.end ]
- %tmp41 = phi i32 [ 0, %if.end.thread ], [ %.pre407, %if.end ]
- %tmp42 = phi i32 [ 0, %if.end.thread ], [ %.pre408, %if.end ]
- %tmp43 = phi i32 [ 0, %if.end.thread ], [ %.pre409, %if.end ]
- %tmp44 = phi i32 [ 0, %if.end.thread ], [ %.pre410, %if.end ]
- %tmp45 = phi i32 [ 0, %if.end.thread ], [ %.pre411, %if.end ]
- %tmp46 = phi i32 [ 0, %if.end.thread ], [ %.pre412, %if.end ]
- %tmp47 = phi i32 [ 0, %if.end.thread ], [ %.pre413, %if.end ]
- %tmp48 = phi i32 [ 0, %if.end.thread ], [ %.pre414, %if.end ]
- %tmp49 = phi i32 [ 0, %if.end.thread ], [ %.pre415, %if.end ]
- %tmp50 = phi i32 [ 0, %if.end.thread ], [ %.pre416, %if.end ]
- %tmp51 = phi i32 [ 0, %if.end.thread ], [ %.pre417, %if.end ]
- %tmp52 = phi i32 [ 0, %if.end.thread ], [ %.pre418, %if.end ]
- %tmp53 = phi i32 [ 0, %if.end.thread ], [ %.pre419, %if.end ]
- %tmp54 = phi i32 [ 0, %if.end.thread ], [ %.pre420, %if.end ]
- %tmp55 = phi i32 [ 0, %if.end.thread ], [ %.pre421, %if.end ]
- %tmp56 = phi i32 [ 0, %if.end.thread ], [ %.pre422, %if.end ]
- %tmp57 = phi i32 [ 0, %if.end.thread ], [ %.pre423, %if.end ]
- %save_j3.pre-phi469 = phi i32* [ %save_j, %if.end.thread ], [ %save_j3.phi.trans.insert, %if.end ]
- %save_t4.pre-phi467 = phi i32* [ %save_t, %if.end.thread ], [ %save_t4.phi.trans.insert, %if.end ]
- %save_alphaSize5.pre-phi465 = phi i32* [ %save_alphaSize, %if.end.thread ], [ %save_alphaSize5.phi.trans.insert, %if.end ]
- %save_nGroups6.pre-phi463 = phi i32* [ %save_nGroups, %if.end.thread ], [ %save_nGroups6.phi.trans.insert, %if.end ]
- %save_nSelectors7.pre-phi461 = phi i32* [ %save_nSelectors, %if.end.thread ], [ %save_nSelectors7.phi.trans.insert, %if.end ]
- %save_EOB8.pre-phi459 = phi i32* [ %save_EOB, %if.end.thread ], [ %save_EOB8.phi.trans.insert, %if.end ]
- %save_groupNo9.pre-phi457 = phi i32* [ %save_groupNo, %if.end.thread ], [ %save_groupNo9.phi.trans.insert, %if.end ]
- %save_groupPos10.pre-phi455 = phi i32* [ %save_groupPos, %if.end.thread ], [ %save_groupPos10.phi.trans.insert, %if.end ]
- %save_nextSym11.pre-phi453 = phi i32* [ %save_nextSym, %if.end.thread ], [ %save_nextSym11.phi.trans.insert, %if.end ]
- %save_nblockMAX12.pre-phi451 = phi i32* [ %save_nblockMAX, %if.end.thread ], [ %save_nblockMAX12.phi.trans.insert, %if.end ]
- %save_nblock13.pre-phi449 = phi i32* [ %save_nblock, %if.end.thread ], [ %save_nblock13.phi.trans.insert, %if.end ]
- %save_es14.pre-phi447 = phi i32* [ %save_es, %if.end.thread ], [ %save_es14.phi.trans.insert, %if.end ]
- %save_N15.pre-phi445 = phi i32* [ %save_N, %if.end.thread ], [ %save_N15.phi.trans.insert, %if.end ]
- %save_curr16.pre-phi443 = phi i32* [ %save_curr, %if.end.thread ], [ %save_curr16.phi.trans.insert, %if.end ]
- %save_zt17.pre-phi441 = phi i32* [ %save_zt, %if.end.thread ], [ %save_zt17.phi.trans.insert, %if.end ]
- %save_zn18.pre-phi439 = phi i32* [ %save_zn, %if.end.thread ], [ %save_zn18.phi.trans.insert, %if.end ]
- %save_zvec19.pre-phi437 = phi i32* [ %save_zvec, %if.end.thread ], [ %save_zvec19.phi.trans.insert, %if.end ]
- %save_zj20.pre-phi435 = phi i32* [ %save_zj, %if.end.thread ], [ %save_zj20.phi.trans.insert, %if.end ]
- tail call void @bar(i32 4001)
- br label %save_state_and_return
-
-save_state_and_return: ; preds = %sw.default, %if.end.140, %if.then.130, %if.end.82, %if.end.33, %if.then.29
- %tmp58 = phi i32 [ %tmp39, %sw.default ], [ %.pre, %if.then.29 ], [ %.pre, %if.then.130 ], [ %.pre, %if.end.140 ], [ %.pre, %if.end.82 ], [ %.pre, %if.end.33 ]
- %tmp59 = phi i32 [ %tmp40, %sw.default ], [ %.pre406, %if.then.29 ], [ %.pre406, %if.then.130 ], [ %.pre406, %if.end.140 ], [ %.pre406, %if.end.82 ], [ %.pre406, %if.end.33 ]
- %tmp60 = phi i32 [ %tmp41, %sw.default ], [ %.pre407, %if.then.29 ], [ %.pre407, %if.then.130 ], [ %.pre407, %if.end.140 ], [ %.pre407, %if.end.82 ], [ %.pre407, %if.end.33 ]
- %tmp61 = phi i32 [ %tmp43, %sw.default ], [ %.pre409, %if.then.29 ], [ %.pre409, %if.then.130 ], [ %.pre409, %if.end.140 ], [ %.pre409, %if.end.82 ], [ %.pre409, %if.end.33 ]
- %tmp62 = phi i32 [ %tmp44, %sw.default ], [ %.pre410, %if.then.29 ], [ %.pre410, %if.then.130 ], [ %.pre410, %if.end.140 ], [ %.pre410, %if.end.82 ], [ %.pre410, %if.end.33 ]
- %tmp63 = phi i32 [ %tmp45, %sw.default ], [ %.pre411, %if.then.29 ], [ %.pre411, %if.then.130 ], [ %.pre411, %if.end.140 ], [ %.pre411, %if.end.82 ], [ %.pre411, %if.end.33 ]
- %tmp64 = phi i32 [ %tmp46, %sw.default ], [ %.pre412, %if.then.29 ], [ %.pre412, %if.then.130 ], [ %.pre412, %if.end.140 ], [ %.pre412, %if.end.82 ], [ %.pre412, %if.end.33 ]
- %tmp65 = phi i32 [ %tmp47, %sw.default ], [ %.pre413, %if.then.29 ], [ %.pre413, %if.then.130 ], [ %.pre413, %if.end.140 ], [ %.pre413, %if.end.82 ], [ %.pre413, %if.end.33 ]
- %tmp66 = phi i32 [ %tmp48, %sw.default ], [ %.pre414, %if.then.29 ], [ %.pre414, %if.then.130 ], [ %.pre414, %if.end.140 ], [ %.pre414, %if.end.82 ], [ %.pre414, %if.end.33 ]
- %tmp67 = phi i32 [ %tmp49, %sw.default ], [ %.pre415, %if.then.29 ], [ %.pre415, %if.then.130 ], [ %.pre415, %if.end.140 ], [ %.pre415, %if.end.82 ], [ %.pre415, %if.end.33 ]
- %tmp68 = phi i32 [ %tmp51, %sw.default ], [ %.pre417, %if.then.29 ], [ %.pre417, %if.then.130 ], [ %.pre417, %if.end.140 ], [ %.pre417, %if.end.82 ], [ %.pre417, %if.end.33 ]
- %tmp69 = phi i32 [ %tmp52, %sw.default ], [ %.pre418, %if.then.29 ], [ %.pre418, %if.then.130 ], [ %.pre418, %if.end.140 ], [ %.pre418, %if.end.82 ], [ %.pre418, %if.end.33 ]
- %tmp70 = phi i32 [ %tmp53, %sw.default ], [ %.pre419, %if.then.29 ], [ %.pre419, %if.then.130 ], [ %.pre419, %if.end.140 ], [ %.pre419, %if.end.82 ], [ %.pre419, %if.end.33 ]
- %tmp71 = phi i32 [ %tmp54, %sw.default ], [ %.pre420, %if.then.29 ], [ %.pre420, %if.then.130 ], [ %.pre420, %if.end.140 ], [ %.pre420, %if.end.82 ], [ %.pre420, %if.end.33 ]
- %tmp72 = phi i32 [ %tmp55, %sw.default ], [ %.pre421, %if.then.29 ], [ %.pre421, %if.then.130 ], [ %.pre421, %if.end.140 ], [ %.pre421, %if.end.82 ], [ %.pre421, %if.end.33 ]
- %tmp73 = phi i32 [ %tmp56, %sw.default ], [ %.pre422, %if.then.29 ], [ %.pre422, %if.then.130 ], [ %.pre422, %if.end.140 ], [ %.pre422, %if.end.82 ], [ %.pre422, %if.end.33 ]
- %tmp74 = phi i32 [ %tmp57, %sw.default ], [ %.pre423, %if.then.29 ], [ %.pre423, %if.then.130 ], [ %.pre423, %if.end.140 ], [ %.pre423, %if.end.82 ], [ %.pre423, %if.end.33 ]
- %save_j3.pre-phi468 = phi i32* [ %save_j3.pre-phi469, %sw.default ], [ %save_j3.phi.trans.insert, %if.then.29 ], [ %save_j3.phi.trans.insert, %if.then.130 ], [ %save_j3.phi.trans.insert, %if.end.140 ], [ %save_j3.phi.trans.insert, %if.end.82 ], [ %save_j3.phi.trans.insert, %if.end.33 ]
- %save_t4.pre-phi466 = phi i32* [ %save_t4.pre-phi467, %sw.default ], [ %save_t4.phi.trans.insert, %if.then.29 ], [ %save_t4.phi.trans.insert, %if.then.130 ], [ %save_t4.phi.trans.insert, %if.end.140 ], [ %save_t4.phi.trans.insert, %if.end.82 ], [ %save_t4.phi.trans.insert, %if.end.33 ]
- %save_alphaSize5.pre-phi464 = phi i32* [ %save_alphaSize5.pre-phi465, %sw.default ], [ %save_alphaSize5.phi.trans.insert, %if.then.29 ], [ %save_alphaSize5.phi.trans.insert, %if.then.130 ], [ %save_alphaSize5.phi.trans.insert, %if.end.140 ], [ %save_alphaSize5.phi.trans.insert, %if.end.82 ], [ %save_alphaSize5.phi.trans.insert, %if.end.33 ]
- %save_nGroups6.pre-phi462 = phi i32* [ %save_nGroups6.pre-phi463, %sw.default ], [ %save_nGroups6.phi.trans.insert, %if.then.29 ], [ %save_nGroups6.phi.trans.insert, %if.then.130 ], [ %save_nGroups6.phi.trans.insert, %if.end.140 ], [ %save_nGroups6.phi.trans.insert, %if.end.82 ], [ %save_nGroups6.phi.trans.insert, %if.end.33 ]
- %save_nSelectors7.pre-phi460 = phi i32* [ %save_nSelectors7.pre-phi461, %sw.default ], [ %save_nSelectors7.phi.trans.insert, %if.then.29 ], [ %save_nSelectors7.phi.trans.insert, %if.then.130 ], [ %save_nSelectors7.phi.trans.insert, %if.end.140 ], [ %save_nSelectors7.phi.trans.insert, %if.end.82 ], [ %save_nSelectors7.phi.trans.insert, %if.end.33 ]
- %save_EOB8.pre-phi458 = phi i32* [ %save_EOB8.pre-phi459, %sw.default ], [ %save_EOB8.phi.trans.insert, %if.then.29 ], [ %save_EOB8.phi.trans.insert, %if.then.130 ], [ %save_EOB8.phi.trans.insert, %if.end.140 ], [ %save_EOB8.phi.trans.insert, %if.end.82 ], [ %save_EOB8.phi.trans.insert, %if.end.33 ]
- %save_groupNo9.pre-phi456 = phi i32* [ %save_groupNo9.pre-phi457, %sw.default ], [ %save_groupNo9.phi.trans.insert, %if.then.29 ], [ %save_groupNo9.phi.trans.insert, %if.then.130 ], [ %save_groupNo9.phi.trans.insert, %if.end.140 ], [ %save_groupNo9.phi.trans.insert, %if.end.82 ], [ %save_groupNo9.phi.trans.insert, %if.end.33 ]
- %save_groupPos10.pre-phi454 = phi i32* [ %save_groupPos10.pre-phi455, %sw.default ], [ %save_groupPos10.phi.trans.insert, %if.then.29 ], [ %save_groupPos10.phi.trans.insert, %if.then.130 ], [ %save_groupPos10.phi.trans.insert, %if.end.140 ], [ %save_groupPos10.phi.trans.insert, %if.end.82 ], [ %save_groupPos10.phi.trans.insert, %if.end.33 ]
- %save_nextSym11.pre-phi452 = phi i32* [ %save_nextSym11.pre-phi453, %sw.default ], [ %save_nextSym11.phi.trans.insert, %if.then.29 ], [ %save_nextSym11.phi.trans.insert, %if.then.130 ], [ %save_nextSym11.phi.trans.insert, %if.end.140 ], [ %save_nextSym11.phi.trans.insert, %if.end.82 ], [ %save_nextSym11.phi.trans.insert, %if.end.33 ]
- %save_nblockMAX12.pre-phi450 = phi i32* [ %save_nblockMAX12.pre-phi451, %sw.default ], [ %save_nblockMAX12.phi.trans.insert, %if.then.29 ], [ %save_nblockMAX12.phi.trans.insert, %if.then.130 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.140 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.82 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.33 ]
- %save_nblock13.pre-phi448 = phi i32* [ %save_nblock13.pre-phi449, %sw.default ], [ %save_nblock13.phi.trans.insert, %if.then.29 ], [ %save_nblock13.phi.trans.insert, %if.then.130 ], [ %save_nblock13.phi.trans.insert, %if.end.140 ], [ %save_nblock13.phi.trans.insert, %if.end.82 ], [ %save_nblock13.phi.trans.insert, %if.end.33 ]
- %save_es14.pre-phi446 = phi i32* [ %save_es14.pre-phi447, %sw.default ], [ %save_es14.phi.trans.insert, %if.then.29 ], [ %save_es14.phi.trans.insert, %if.then.130 ], [ %save_es14.phi.trans.insert, %if.end.140 ], [ %save_es14.phi.trans.insert, %if.end.82 ], [ %save_es14.phi.trans.insert, %if.end.33 ]
- %save_N15.pre-phi444 = phi i32* [ %save_N15.pre-phi445, %sw.default ], [ %save_N15.phi.trans.insert, %if.then.29 ], [ %save_N15.phi.trans.insert, %if.then.130 ], [ %save_N15.phi.trans.insert, %if.end.140 ], [ %save_N15.phi.trans.insert, %if.end.82 ], [ %save_N15.phi.trans.insert, %if.end.33 ]
- %save_curr16.pre-phi442 = phi i32* [ %save_curr16.pre-phi443, %sw.default ], [ %save_curr16.phi.trans.insert, %if.then.29 ], [ %save_curr16.phi.trans.insert, %if.then.130 ], [ %save_curr16.phi.trans.insert, %if.end.140 ], [ %save_curr16.phi.trans.insert, %if.end.82 ], [ %save_curr16.phi.trans.insert, %if.end.33 ]
- %save_zt17.pre-phi440 = phi i32* [ %save_zt17.pre-phi441, %sw.default ], [ %save_zt17.phi.trans.insert, %if.then.29 ], [ %save_zt17.phi.trans.insert, %if.then.130 ], [ %save_zt17.phi.trans.insert, %if.end.140 ], [ %save_zt17.phi.trans.insert, %if.end.82 ], [ %save_zt17.phi.trans.insert, %if.end.33 ]
- %save_zn18.pre-phi438 = phi i32* [ %save_zn18.pre-phi439, %sw.default ], [ %save_zn18.phi.trans.insert, %if.then.29 ], [ %save_zn18.phi.trans.insert, %if.then.130 ], [ %save_zn18.phi.trans.insert, %if.end.140 ], [ %save_zn18.phi.trans.insert, %if.end.82 ], [ %save_zn18.phi.trans.insert, %if.end.33 ]
- %save_zvec19.pre-phi436 = phi i32* [ %save_zvec19.pre-phi437, %sw.default ], [ %save_zvec19.phi.trans.insert, %if.then.29 ], [ %save_zvec19.phi.trans.insert, %if.then.130 ], [ %save_zvec19.phi.trans.insert, %if.end.140 ], [ %save_zvec19.phi.trans.insert, %if.end.82 ], [ %save_zvec19.phi.trans.insert, %if.end.33 ]
- %save_zj20.pre-phi434 = phi i32* [ %save_zj20.pre-phi435, %sw.default ], [ %save_zj20.phi.trans.insert, %if.then.29 ], [ %save_zj20.phi.trans.insert, %if.then.130 ], [ %save_zj20.phi.trans.insert, %if.end.140 ], [ %save_zj20.phi.trans.insert, %if.end.82 ], [ %save_zj20.phi.trans.insert, %if.end.33 ]
- %nblock.1 = phi i32 [ %tmp50, %sw.default ], [ %.pre416, %if.then.29 ], [ 0, %if.then.130 ], [ %.pre416, %if.end.140 ], [ %.pre416, %if.end.82 ], [ %.pre416, %if.end.33 ]
- %alphaSize.1 = phi i32 [ %tmp42, %sw.default ], [ %.pre408, %if.then.29 ], [ %add179, %if.then.130 ], [ %.pre408, %if.end.140 ], [ %.pre408, %if.end.82 ], [ %.pre408, %if.end.33 ]
- %retVal.0 = phi i32 [ 0, %sw.default ], [ -5, %if.then.29 ], [ -4, %if.then.130 ], [ 0, %if.end.140 ], [ 0, %if.end.82 ], [ 0, %if.end.33 ]
- store i32 %tmp58, i32* %save_i, align 4
- store i32 %tmp59, i32* %save_j3.pre-phi468, align 4
- store i32 %tmp60, i32* %save_t4.pre-phi466, align 4
- store i32 %alphaSize.1, i32* %save_alphaSize5.pre-phi464, align 4
- store i32 %tmp61, i32* %save_nGroups6.pre-phi462, align 4
- store i32 %tmp62, i32* %save_nSelectors7.pre-phi460, align 4
- store i32 %tmp63, i32* %save_EOB8.pre-phi458, align 4
- store i32 %tmp64, i32* %save_groupNo9.pre-phi456, align 4
- store i32 %tmp65, i32* %save_groupPos10.pre-phi454, align 4
- store i32 %tmp66, i32* %save_nextSym11.pre-phi452, align 4
- store i32 %tmp67, i32* %save_nblockMAX12.pre-phi450, align 4
- store i32 %nblock.1, i32* %save_nblock13.pre-phi448, align 4
- store i32 %tmp68, i32* %save_es14.pre-phi446, align 4
- store i32 %tmp69, i32* %save_N15.pre-phi444, align 4
- store i32 %tmp70, i32* %save_curr16.pre-phi442, align 4
- store i32 %tmp71, i32* %save_zt17.pre-phi440, align 4
- store i32 %tmp72, i32* %save_zn18.pre-phi438, align 4
- store i32 %tmp73, i32* %save_zvec19.pre-phi436, align 4
- store i32 %tmp74, i32* %save_zj20.pre-phi434, align 4
- ret i32 %retVal.0
-}
-
-!0 = !{!"branch_weights", i32 10, i32 1}
diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index 90093f94d0ad..708ae083eb86 100644
--- a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -1,4 +1,5 @@
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-fp-elim -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO
; This test aims to check basic correctness of frame layout &
; frame access code. There are 8 functions in this test file,
@@ -97,27 +98,47 @@ entry:
; CHECK-LABEL: novla_nodynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
-; Check that the frame pointer is created:
-; CHECK: stp x29, x30, [sp, #16]
-; CHECK: add x29, sp, #16
+; CHECK: sub sp, sp, #32
+; CHECK: stp x19, x30, [sp, #16]
; Check correctness of cfi pseudo-instructions
-; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_def_cfa_offset 32
; CHECK: .cfi_offset w30, -8
-; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
-; Check correct access to arguments passed on the stack, through frame pointer
-; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
-; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: .cfi_offset w19, -16
+; Check correct access to arguments passed on the stack, through stack pointer
+; CHECK: ldr d[[DARG:[0-9]+]], [sp, #56]
+; CHECK: ldr w[[IARG:[0-9]+]], [sp, #40]
; Check correct access to local variable on the stack, through stack pointer
; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
; Check epilogue:
-; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldp x19, x30, [sp, #16]
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _novla_nodynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: sub sp, sp, #48
+; CHECK-MACHO: stp x20, x19, [sp, #16]
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #32]
+; CHECK-MACHO: add x29, sp, #32
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; Check correct access to local variable on the stack, through stack pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp, #12]
+; Check epilogue:
+; CHECK-MACHO: ldp x29, x30, [sp, #32]
+; CHECK-MACHO: ldp x20, x19, [sp, #16]
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
declare i32 @g() #0
@@ -159,7 +180,7 @@ entry:
; CHECK-LABEL: novla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -170,8 +191,7 @@ entry:
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w19, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
@@ -181,10 +201,39 @@ entry:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16 // =16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _novla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK-MACHO: sub x9, sp, #96
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
; Function Attrs: nounwind
define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -336,7 +385,7 @@ entry:
; CHECK-LABEL: vla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x22, x21, [sp, #-48]!
+; CHECK: str x21, [sp, #-48]!
; CHECK: stp x20, x19, [sp, #16]
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #32]
@@ -354,8 +403,7 @@ entry:
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
-; CHECK: .cfi_offset w21, -40
-; CHECK: .cfi_offset w22, -48
+; CHECK: .cfi_offset w21, -48
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
@@ -376,10 +424,57 @@ entry:
; CHECK: sub sp, x29, #32
; CHECK: ldp x29, x30, [sp, #32]
; CHECK: ldp x20, x19, [sp, #16]
-; CHECK: ldp x22, x21, [sp], #48
+; CHECK: ldr x21, [sp], #48
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x22, x21, [sp, #-48]!
+; CHECK-MACHO: stp x20, x19, [sp, #16]
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #32]
+; CHECK-MACHO: add x29, sp, #32
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #80
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov x19, sp
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; CHECK-MACHO: .cfi_offset w21, -40
+; CHECK-MACHO: .cfi_offset w22, -48
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #32
+; CHECK-MACHO: ldp x29, x30, [sp, #32]
+; CHECK-MACHO: ldp x20, x19, [sp, #16]
+; CHECK-MACHO: ldp x22, x21, [sp], #48
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -398,7 +493,7 @@ entry:
; CHECK-LABEL: vla_dynamicrealign_nocall
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -428,9 +523,44 @@ entry:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall:
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #96
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -449,7 +579,7 @@ entry:
; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: stp x28, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -479,9 +609,44 @@ entry:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldp x28, x19, [sp], #32
; CHECK: ret
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall_large_align:
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #7, lsl #12
+; CHECK-MACHO: and sp, x9, #0xffffffffffff8000
+; CHECK-MACHO: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
define void @realign_conditional(i1 %b) {
entry:
@@ -509,7 +674,7 @@ bb1:
define void @realign_conditional2(i1 %b) {
entry:
- %tmp = alloca i8, i32 4
+ %tmp = alloca i8, i32 16
br i1 %b, label %bb0, label %bb1
bb0:
@@ -522,18 +687,18 @@ bb1:
; CHECK-LABEL: realign_conditional2
; Extra realignment in the prologue (performance issue).
+; CHECK: tbz {{.*}} .[[LABEL:.*]]
; CHECK: sub x9, sp, #32 // =32
; CHECK: and sp, x9, #0xffffffffffffffe0
; CHECK: mov x19, sp
-; CHECK: tbz {{.*}} .[[LABEL:.*]]
; Stack is realigned in a non-entry BB.
; CHECK: sub [[REG:x[01-9]+]], sp, #64
; CHECK: and sp, [[REG]], #0xffffffffffffffe0
; CHECK: .[[LABEL]]:
; CHECK: ret
-attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
diff --git a/test/CodeGen/AArch64/aarch64-gep-opt.ll b/test/CodeGen/AArch64/aarch64-gep-opt.ll
index 93e2ff14ac71..cae00a9b1cb3 100644
--- a/test/CodeGen/AArch64/aarch64-gep-opt.ll
+++ b/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -1,6 +1,9 @@
; RUN: llc -O3 -aarch64-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -O3 -aarch64-gep-opt=true -mattr=-use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
+; RUN: llc -O3 -aarch64-gep-opt=true -mattr=+use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
+
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-linux-gnueabi"
diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll
new file mode 100644
index 000000000000..8628c4288c69
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -mtriple=aarch64 -interleaved-access -S | FileCheck %s
+
+; CHECK-LABEL: @extract_user_basic(
+; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
+; CHECK: extractelement <4 x i32> %[[R]], i64 1
+define void @extract_user_basic(<8 x i32>* %A, i1 %C) {
+entry:
+ %L = load <8 x i32>, <8 x i32>* %A, align 8
+ %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ br i1 %C, label %if.then, label %if.merge
+
+if.then:
+ %E = extractelement <8 x i32> %L, i32 2
+ br label %if.merge
+
+if.merge:
+ ret void
+}
+
+; CHECK-LABEL: @extract_user_multi(
+; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
+; CHECK: extractelement <4 x i32> %[[R]], i64 0
+; CHECK: extractelement <4 x i32> %[[R]], i64 1
+define void @extract_user_multi(<8 x i32>* %A, i1 %C) {
+entry:
+ %L = load <8 x i32>, <8 x i32>* %A, align 8
+ %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ br i1 %C, label %if.then, label %if.merge
+
+if.then:
+ %E1 = extractelement <8 x i32> %L, i32 0
+ br label %if.merge
+
+if.merge:
+ %E2 = extractelement <8 x i32> %L, i32 2
+ ret void
+}
+
+; CHECK-LABEL: @extract_user_multi_no_dom(
+; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+define void @extract_user_multi_no_dom(<8 x i32>* %A, i1 %C) {
+entry:
+ %L = load <8 x i32>, <8 x i32>* %A, align 8
+ %E1 = extractelement <8 x i32> %L, i32 0
+ br i1 %C, label %if.then, label %if.merge
+
+if.then:
+ %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %E2 = extractelement <8 x i32> %L, i32 2
+ br label %if.merge
+
+if.merge:
+ ret void
+}
+
+; CHECK-LABEL: @extract_user_wrong_const_index(
+; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+define void @extract_user_wrong_const_index(<8 x i32>* %A) {
+entry:
+ %L = load <8 x i32>, <8 x i32>* %A, align 8
+ %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %E = extractelement <8 x i32> %L, i32 1
+ ret void
+}
+
+; CHECK-LABEL: @extract_user_undef_index(
+; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+define void @extract_user_undef_index(<8 x i32>* %A) {
+entry:
+ %L = load <8 x i32>, <8 x i32>* %A, align 8
+ %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %E = extractelement <8 x i32> %L, i32 undef
+ ret void
+}
+
+; CHECK-LABEL: @extract_user_var_index(
+; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+define void @extract_user_var_index(<8 x i32>* %A, i32 %I) {
+entry:
+ %L = load <8 x i32>, <8 x i32>* %A, align 8
+ %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %E = extractelement <8 x i32> %L, i32 %I
+ ret void
+}
diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
index 1bc2a3ccb1ca..845050156baa 100644
--- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
+++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
@@ -268,3 +268,15 @@ define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
store <3 x float> %tmp1, <3 x float>* %p, align 16
ret void
}
+
+; NEON-LABEL: load_factor2_with_extract_user:
+; NEON: ld2 { v0.4s, v1.4s }, [x0]
+; NEON: mov w0, v0.s[1]
+; NONEON-LABEL: load_factor2_with_extract_user:
+; NONEON-NOT: ld2
+define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
+ %1 = load <8 x i32>, <8 x i32>* %a, align 8
+ %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %3 = extractelement <8 x i32> %1, i32 2
+ ret i32 %3
+}
diff --git a/test/CodeGen/AArch64/aarch64-smull.ll b/test/CodeGen/AArch64/aarch64-smull.ll
index ec0e2de92d0d..1c8d13a00b2a 100644
--- a/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/test/CodeGen/AArch64/aarch64-smull.ll
@@ -234,7 +234,7 @@ define <8 x i16> @smull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
; CHECK-LABEL: smull_noextvec_v8i8_v8i16:
-; CHECK: movz
+; CHECK: mov
; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
%tmp3 = sext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
@@ -268,7 +268,7 @@ define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK: movz
+; CHECK: mov
; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
%tmp3 = zext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
diff --git a/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/test/CodeGen/AArch64/aarch64-stp-cluster.ll
new file mode 100644
index 000000000000..5cab38eafb52
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-stp-cluster.ll
@@ -0,0 +1,149 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -aarch64-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s
+
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_i64_scale:BB#0
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(4): STRXui %vreg1, %vreg0, 1
+; CHECK:SU(3): STRXui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRXui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRXui %vreg1, %vreg0, 4
+define i64 @stp_i64_scale(i64* nocapture %P, i64 %v) {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
+ store i64 %v, i64* %arrayidx
+ %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
+ store i64 %v, i64* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
+ store i64 %v, i64* %arrayidx3
+ ret i64 %v
+}
+
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_i32_scale:BB#0
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(4): STRWui %vreg1, %vreg0, 1
+; CHECK:SU(3): STRWui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRWui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRWui %vreg1, %vreg0, 4
+define i32 @stp_i32_scale(i32* nocapture %P, i32 %v) {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %P, i32 3
+ store i32 %v, i32* %arrayidx
+ %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 2
+ store i32 %v, i32* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 1
+ store i32 %v, i32* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 4
+ store i32 %v, i32* %arrayidx3
+ ret i32 %v
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_i64_unscale:BB#0 entry
+; CHECK:Cluster ld/st SU(5) - SU(2)
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:SU(5): STURXi %vreg1, %vreg0, -32
+; CHECK:SU(2): STURXi %vreg1, %vreg0, -24
+; CHECK:SU(4): STURXi %vreg1, %vreg0, -16
+; CHECK:SU(3): STURXi %vreg1, %vreg0, -8
+define void @stp_i64_unscale(i64* nocapture %P, i64 %v) #0 {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3
+ store i64 %v, i64* %arrayidx
+ %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2
+ store i64 %v, i64* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4
+ store i64 %v, i64* %arrayidx3
+ ret void
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_i32_unscale:BB#0 entry
+; CHECK:Cluster ld/st SU(5) - SU(2)
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:SU(5): STURWi %vreg1, %vreg0, -16
+; CHECK:SU(2): STURWi %vreg1, %vreg0, -12
+; CHECK:SU(4): STURWi %vreg1, %vreg0, -8
+; CHECK:SU(3): STURWi %vreg1, %vreg0, -4
+define void @stp_i32_unscale(i32* nocapture %P, i32 %v) #0 {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3
+ store i32 %v, i32* %arrayidx
+ %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1
+ store i32 %v, i32* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2
+ store i32 %v, i32* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4
+ store i32 %v, i32* %arrayidx3
+ ret void
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_double:BB#0
+; CHECK:Cluster ld/st SU(3) - SU(4)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(3): STRDui %vreg1, %vreg0, 1
+; CHECK:SU(4): STRDui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRDui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRDui %vreg1, %vreg0, 4
+define void @stp_double(double* nocapture %P, double %v) {
+entry:
+ %arrayidx = getelementptr inbounds double, double* %P, i64 3
+ store double %v, double* %arrayidx
+ %arrayidx1 = getelementptr inbounds double, double* %P, i64 1
+ store double %v, double* %arrayidx1
+ %arrayidx2 = getelementptr inbounds double, double* %P, i64 2
+ store double %v, double* %arrayidx2
+ %arrayidx3 = getelementptr inbounds double, double* %P, i64 4
+ store double %v, double* %arrayidx3
+ ret void
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_float:BB#0
+; CHECK:Cluster ld/st SU(3) - SU(4)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(3): STRSui %vreg1, %vreg0, 1
+; CHECK:SU(4): STRSui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRSui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRSui %vreg1, %vreg0, 4
+define void @stp_float(float* nocapture %P, float %v) {
+entry:
+ %arrayidx = getelementptr inbounds float, float* %P, i64 3
+ store float %v, float* %arrayidx
+ %arrayidx1 = getelementptr inbounds float, float* %P, i64 1
+ store float %v, float* %arrayidx1
+ %arrayidx2 = getelementptr inbounds float, float* %P, i64 2
+ store float %v, float* %arrayidx2
+ %arrayidx3 = getelementptr inbounds float, float* %P, i64 4
+ store float %v, float* %arrayidx3
+ ret void
+}
+
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_volatile:BB#0
+; CHECK-NOT: Cluster ld/st
+; CHECK:SU(2): STRXui %vreg1, %vreg0, 3; mem:Volatile
+; CHECK:SU(3): STRXui %vreg1, %vreg0, 2; mem:Volatile
+; CHECK:SU(4): STRXui %vreg1, %vreg0, 1; mem:Volatile
+; CHECK:SU(5): STRXui %vreg1, %vreg0, 4; mem:Volatile
+define i64 @stp_volatile(i64* nocapture %P, i64 %v) {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
+ store volatile i64 %v, i64* %arrayidx
+ %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
+ store volatile i64 %v, i64* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
+ store volatile i64 %v, i64* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
+ store volatile i64 %v, i64* %arrayidx3
+ ret i64 %v
+}
+
diff --git a/test/CodeGen/AArch64/aarch64-tbz.ll b/test/CodeGen/AArch64/aarch64-tbz.ll
new file mode 100644
index 000000000000..f4ebcc70674b
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-tbz.ll
@@ -0,0 +1,98 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnueabi < %s | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: tbz {{w[0-9]}}, #3, {{.LBB0_3}}
+; CHECK: tbz w[[REG1:[0-9]+]], #2, {{.LBB0_3}}
+; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]], #0x4
+; CHECK-NOT: cbz [[REG2]], {{.LBB0_3}}
+
+; CHECK: b
+define void @test1(i64 %A, i64 %B) {
+entry:
+ %and = and i64 %A, 4
+ %notlhs = icmp eq i64 %and, 0
+ %and.1 = and i64 %B, 8
+ %0 = icmp eq i64 %and.1, 0
+ %1 = or i1 %0, %notlhs
+ br i1 %1, label %if.end3, label %if.then2
+
+if.then2: ; preds = %entry
+ tail call void @foo(i64 %A, i64 %B)
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %entry
+ ret void
+}
+
+; CHECK-LABEL: test2
+; CHECK: cbz {{x[0-9]}}, {{.LBB1_3}}
+; CHECK: tbz w[[REG1:[0-9]+]], #3, {{.LBB1_3}}
+; CHECK-NOT: and [REG2:x[0-9]+], x[[REG1]], #0x08
+; CHECK-NOT: cbz [[REG2]], {{.LBB1_3}}
+
+define void @test2(i64 %A, i64* readonly %B) #0 {
+entry:
+ %tobool = icmp eq i64* %B, null
+ %and = and i64 %A, 8
+ %tobool1 = icmp eq i64 %and, 0
+ %or.cond = or i1 %tobool, %tobool1
+ br i1 %or.cond, label %if.end3, label %if.then2
+
+if.then2: ; preds = %entry
+ %0 = load i64, i64* %B, align 4
+ tail call void @foo(i64 %A, i64 %0)
+ br label %if.end3
+
+if.end3: ; preds = %entry, %if.then2
+ ret void
+}
+
+; Make sure we use the W variant when log2(mask) is < 32.
+; CHECK-LABEL: test3
+; CHECK: tbz {{w[0-9]}}, #3, {{.LBB2_3}}
+; CHECK: tbz w[[REG1:[0-9]+]], #28, {{.LBB2_3}}
+; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]]
+; CHECK-NOT: cbz [[REG2]], {{.LBB2_3}}
+define void @test3(i64 %A, i64 %B) {
+entry:
+ %shift = shl i64 1, 28
+ %and = and i64 %A, %shift
+ %notlhs = icmp eq i64 %and, 0
+ %and.1 = and i64 %B, 8
+ %0 = icmp eq i64 %and.1, 0
+ %1 = or i1 %0, %notlhs
+ br i1 %1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %entry
+ tail call void @foo(i64 %A, i64 %B)
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %entry
+ ret void
+}
+
+; CHECK-LABEL: test4
+; CHECK: tbz {{w[0-9]}}, #3, {{.LBB3_3}}
+; CHECK: tbz [[REG1:x[0-9]+]], #35, {{.LBB3_3}}
+; CHECK-NOT: and [[REG2:x[0-9]+]], x[[REG1]]
+; CHECK-NOT: cbz [[REG2]], {{.LBB2_3}}
+define void @test4(i64 %A, i64 %B) {
+entry:
+ %shift = shl i64 1, 35
+ %and = and i64 %A, %shift
+ %notlhs = icmp eq i64 %and, 0
+ %and.1 = and i64 %B, 8
+ %0 = icmp eq i64 %and.1, 0
+ %1 = or i1 %0, %notlhs
+ br i1 %1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %entry
+ tail call void @foo(i64 %A, i64 %B)
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %entry
+ ret void
+}
+
+
+declare void @foo(i64, i64)
diff --git a/test/CodeGen/AArch64/aarch64-tryBitfieldInsertOpFromOr-crash.ll b/test/CodeGen/AArch64/aarch64-tryBitfieldInsertOpFromOr-crash.ll
new file mode 100644
index 000000000000..3c986ba2e513
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-tryBitfieldInsertOpFromOr-crash.ll
@@ -0,0 +1,36 @@
+; RUN: llc <%s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; Function Attrs: noreturn nounwind
+define void @foo(i32* %d) {
+entry:
+ %0 = ptrtoint i32* %d to i64
+ %1 = and i64 %0, -36028797018963969
+ %2 = inttoptr i64 %1 to i32*
+ %arrayidx5 = getelementptr inbounds i32, i32* %2, i64 1
+ %arrayidx6 = getelementptr inbounds i32, i32* %2, i64 2
+ %arrayidx7 = getelementptr inbounds i32, i32* %2, i64 3
+ br label %for.cond
+
+for.cond: ; preds = %for.cond, %entry
+ %B.0 = phi i32* [ %d, %entry ], [ %12, %for.cond ]
+ %3 = ptrtoint i32* %B.0 to i64
+ %4 = and i64 %3, -36028797018963969
+ %5 = inttoptr i64 %4 to i32*
+ %6 = load i32, i32* %5, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %5, i64 1
+ %7 = load i32, i32* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %5, i64 2
+ %8 = load i32, i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %5, i64 3
+ %9 = load i32, i32* %arrayidx3, align 4
+ store i32 %6, i32* %2, align 4
+ store i32 %7, i32* %arrayidx5, align 4
+ store i32 %8, i32* %arrayidx6, align 4
+ store i32 %9, i32* %arrayidx7, align 4
+ %10 = ptrtoint i32* %arrayidx1 to i64
+ %11 = or i64 %10, 36028797018963968
+ %12 = inttoptr i64 %11 to i32*
+ br label %for.cond
+}
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
index d6350a6db0ee..c0235cd5d9ef 100644
--- a/test/CodeGen/AArch64/addsub.ll
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -5,6 +5,7 @@
; loads and stores.
@var_i32 = global i32 42
+@var2_i32 = global i32 43
@var_i64 = global i64 0
; Add pure 12-bit immediates:
@@ -106,6 +107,7 @@ define void @sub_med() {
define void @testing() {
; CHECK-LABEL: testing:
%val = load i32, i32* @var_i32
+ %val2 = load i32, i32* @var2_i32
; CHECK: cmp {{w[0-9]+}}, #4095
; CHECK: b.ne [[RET:.?LBB[0-9]+_[0-9]+]]
@@ -117,7 +119,7 @@ test2:
; CHECK: b.lo [[RET]]
%newval2 = add i32 %val, 1
store i32 %newval2, i32* @var_i32
- %cmp_pos_big = icmp ult i32 %val, 14610432
+ %cmp_pos_big = icmp ult i32 %val2, 14610432
br i1 %cmp_pos_big, label %ret, label %test3
test3:
@@ -133,7 +135,7 @@ test4:
; CHECK: b.gt [[RET]]
%newval4 = add i32 %val, 3
store i32 %newval4, i32* @var_i32
- %cmp_pos_sgt = icmp sgt i32 %val, 321
+ %cmp_pos_sgt = icmp sgt i32 %val2, 321
br i1 %cmp_pos_sgt, label %ret, label %test5
test5:
@@ -141,7 +143,7 @@ test5:
; CHECK: b.gt [[RET]]
%newval5 = add i32 %val, 4
store i32 %newval5, i32* @var_i32
- %cmp_neg_uge = icmp sgt i32 %val, -444
+ %cmp_neg_uge = icmp sgt i32 %val2, -444
br i1 %cmp_neg_uge, label %ret, label %test6
test6:
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
index 45754377b2d9..ab7a631dc248 100644
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK-MACHO
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
declare void @use_addr(i8*)
@@ -113,14 +114,21 @@ define void @test_variadic_alloca(i64 %n, ...) {
define void @test_alloca_large_frame(i64 %n) {
; CHECK-LABEL: test_alloca_large_frame:
+; CHECK-MACHO-LABEL: test_alloca_large_frame:
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: stp x28, x19, [sp, #-32]!
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; CHECK: sub sp, sp, #1953, lsl #12
; CHECK: sub sp, sp, #512
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; CHECK-MACHO: sub sp, sp, #1953, lsl #12
+; CHECK-MACHO: sub sp, sp, #512
+
%addr1 = alloca i8, i64 %n
%addr2 = alloca i64, i64 1000000
@@ -130,7 +138,11 @@ define void @test_alloca_large_frame(i64 %n) {
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldp x28, x19, [sp], #32
+
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
}
declare i8* @llvm.stacksave()
diff --git a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
index a66ea0df2e98..caafde0a1bb2 100644
--- a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
@@ -18,22 +18,21 @@ if.else295: ; preds = %entry
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-!llvm.dbg.gv = !{!0}
-!llvm.dbg.sp = !{!1, !7, !10, !11, !12}
+!llvm.dbg.cu = !{!0}
!0 = !DIGlobalVariable(name: "vsplive", line: 617, isLocal: true, isDefinition: true, scope: !1, file: !2, type: !6)
-!1 = distinct !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!1 = distinct !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !4)
!2 = !DIFile(filename: "print.i", directory: "/Volumes/Ebi/echeng/radars/r9146594")
-!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: FullDebug, file: !20, enums: !21, retainedTypes: !21, globals: !{!0})
!4 = !DISubroutineType(types: !5)
!5 = !{!6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!7 = distinct !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
+!7 = distinct !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !8)
!8 = !DISubroutineType(types: !9)
!9 = !{null}
-!10 = distinct !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
-!11 = distinct !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
-!12 = distinct !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
+!10 = distinct !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !4)
+!11 = distinct !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !4)
+!12 = distinct !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, file: !20, scope: !2, type: !8)
!13 = !DILocation(line: 653, column: 5, scope: !14)
!14 = distinct !DILexicalBlock(line: 652, column: 35, file: !20, scope: !15)
!15 = distinct !DILexicalBlock(line: 616, column: 1, file: !20, scope: !1)
diff --git a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
index 8784abdadfab..6d8c639adb95 100644
--- a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
+++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: bar:
diff --git a/test/CodeGen/AArch64/arm64-aapcs-be.ll b/test/CodeGen/AArch64/arm64-aapcs-be.ll
index e77952e4b8a1..74b6ae16142e 100644
--- a/test/CodeGen/AArch64/arm64-aapcs-be.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs-be.ll
@@ -32,7 +32,8 @@ define float @test_block_addr([8 x float], [1 x float] %in) {
define void @test_block_addr_callee() {
; CHECK-LABEL: test_block_addr_callee:
-; CHECK: str {{[a-z0-9]+}}, [sp, #-16]!
+; CHECK: sub sp, sp, #32
+; CHECK: str {{[a-z0-9]+}}, [sp, #16]
; CHECK: bl test_block_addr
%val = insertvalue [1 x float] undef, float 0.0, 0
call float @test_block_addr([8 x float] undef, [1 x float] %val)
diff --git a/test/CodeGen/AArch64/arm64-abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 03414b56144c..c92703651385 100644
--- a/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -125,7 +125,7 @@ entry:
define void @bar(i32 %x, <4 x i32> %y) nounwind {
entry:
; CHECK-LABEL: bar:
-; CHECK: str {{q[0-9]+}}, [sp, #16]
+; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #16]
; CHECK: str {{x[0-9]+}}, [sp]
%x.addr = alloca i32, align 4
%y.addr = alloca <4 x i32>, align 16
diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll
index 36a682242aaa..fb52b1d99fc9 100644
--- a/test/CodeGen/AArch64/arm64-abi.ll
+++ b/test/CodeGen/AArch64/arm64-abi.ll
@@ -130,7 +130,7 @@ entry:
; CHECK-LABEL: test3
; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
; FAST-LABEL: test3
-; FAST: sub sp, sp, #32
+; FAST: sub sp, sp, #48
; FAST: mov x[[ADDR:[0-9]+]], sp
; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8]
%0 = load <2 x i32>, <2 x i32>* %in, align 8
diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll
index dc9884f12f57..e76adb4abc02 100644
--- a/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
-; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
+; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -O0 -disable-fp-elim | FileCheck -check-prefix=FAST %s
target triple = "arm64-apple-darwin"
; rdar://12648441
@@ -74,7 +74,7 @@ define i32 @caller38_stack() #1 {
entry:
; CHECK-LABEL: caller38_stack
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: mov w[[C:[0-9]+]], #9
; CHECK: str w[[C]], [sp]
%0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
%1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
@@ -128,7 +128,7 @@ entry:
; CHECK-LABEL: caller39_stack
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: mov w[[C:[0-9]+]], #9
; CHECK: str w[[C]], [sp]
%0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
%1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
@@ -184,7 +184,7 @@ entry:
; CHECK-LABEL: caller40_stack
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: mov w[[C:[0-9]+]], #9
; CHECK: str w[[C]], [sp]
%0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
%1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
@@ -238,7 +238,7 @@ entry:
; CHECK-LABEL: caller41_stack
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: mov w[[C:[0-9]+]], #9
; CHECK: str w[[C]], [sp]
%0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
%1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
@@ -291,7 +291,7 @@ entry:
; Space for s2 is allocated at sp
; FAST-LABEL: caller42
-; FAST: sub sp, sp, #96
+; FAST: sub sp, sp, #112
; Space for s1 is allocated at fp-24 = sp+72
; Space for s2 is allocated at sp+48
; FAST: sub x[[A:[0-9]+]], x29, #24
@@ -317,8 +317,8 @@ declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
define i32 @caller42_stack() #3 {
entry:
; CHECK-LABEL: caller42_stack
-; CHECK: mov x29, sp
-; CHECK: sub sp, sp, #96
+; CHECK: sub sp, sp, #112
+; CHECK: add x29, sp, #96
; CHECK: stur {{x[0-9]+}}, [x29, #-16]
; CHECK: stur {{q[0-9]+}}, [x29, #-32]
; CHECK: str {{x[0-9]+}}, [sp, #48]
@@ -330,7 +330,7 @@ entry:
; CHECK: sub x[[A:[0-9]+]], x29, #32
; Address of s1 is passed on stack at sp+8
; CHECK: str x[[A]], [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: mov w[[C:[0-9]+]], #9
; CHECK: str w[[C]], [sp]
; FAST-LABEL: caller42_stack
@@ -399,7 +399,7 @@ entry:
; Space for s2 is allocated at sp
; FAST-LABEL: caller43
-; FAST: mov x29, sp
+; FAST: add x29, sp, #64
; Space for s1 is allocated at sp+32
; Space for s2 is allocated at sp
; FAST: add x1, sp, #32
@@ -429,8 +429,8 @@ declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
define i32 @caller43_stack() #3 {
entry:
; CHECK-LABEL: caller43_stack
-; CHECK: mov x29, sp
-; CHECK: sub sp, sp, #96
+; CHECK: sub sp, sp, #112
+; CHECK: add x29, sp, #96
; CHECK: stur {{q[0-9]+}}, [x29, #-16]
; CHECK: stur {{q[0-9]+}}, [x29, #-32]
; CHECK: str {{q[0-9]+}}, [sp, #48]
@@ -442,11 +442,11 @@ entry:
; CHECK: sub x[[A:[0-9]+]], x29, #32
; Address of s1 is passed on stack at sp+8
; CHECK: str x[[A]], [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: mov w[[C:[0-9]+]], #9
; CHECK: str w[[C]], [sp]
; FAST-LABEL: caller43_stack
-; FAST: sub sp, sp, #96
+; FAST: sub sp, sp, #112
; Space for s1 is allocated at fp-32 = sp+64
; Space for s2 is allocated at sp+32
; FAST: sub x[[A:[0-9]+]], x29, #32
@@ -508,7 +508,7 @@ entry:
; "i64 %0" should be in register x7.
; "i32 8" should be on stack at [sp].
; CHECK: ldr x7, [{{x[0-9]+}}]
-; CHECK: str {{w[0-9]+}}, [sp, #-16]!
+; CHECK: str {{w[0-9]+}}, [sp]
; FAST-LABEL: i64_split
; FAST: ldr x7, [{{x[0-9]+}}]
; FAST: mov x[[R0:[0-9]+]], sp
diff --git a/test/CodeGen/AArch64/arm64-addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll
index c22d0312b24d..0e651a910d7b 100644
--- a/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -82,7 +82,7 @@ define void @t7(i64 %a) {
define void @t8(i64 %a) {
; CHECK-LABEL: t8:
-; CHECK: movn [[REG:x[0-9]+]], #0x1235
+; CHECK: mov [[REG:x[0-9]+]], #-4662
; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
%1 = sub i64 %a, 4662 ;-4662 is 0xffffffffffffedca
%2 = inttoptr i64 %1 to i64*
@@ -92,7 +92,7 @@ define void @t8(i64 %a) {
define void @t9(i64 %a) {
; CHECK-LABEL: t9:
-; CHECK: movn [[REG:x[0-9]+]], #0x1235, lsl #16
+; CHECK: mov [[REG:x[0-9]+]], #-305463297
; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
%1 = add i64 -305463297, %a ;-305463297 is 0xffffffffedcaffff
%2 = inttoptr i64 %1 to i64*
@@ -102,7 +102,7 @@ define void @t9(i64 %a) {
define void @t10(i64 %a) {
; CHECK-LABEL: t10:
-; CHECK: movz [[REG:x[0-9]+]], #0x123, lsl #48
+; CHECK: mov [[REG:x[0-9]+]], #81909218222800896
; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
%1 = add i64 %a, 81909218222800896 ;0x123000000000000
%2 = inttoptr i64 %1 to i64*
@@ -112,8 +112,8 @@ define void @t10(i64 %a) {
define void @t11(i64 %a) {
; CHECK-LABEL: t11:
-; CHECK: movz w[[NUM:[0-9]+]], #0x123, lsl #16
-; CHECK: movk w[[NUM:[0-9]+]], #0x4567
+; CHECK: mov w[[NUM:[0-9]+]], #19070976
+; CHECK: movk w[[NUM:[0-9]+]], #17767
; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]]
%1 = add i64 %a, 19088743 ;0x1234567
%2 = inttoptr i64 %1 to i64*
diff --git a/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll b/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
index bf2d2cfa6066..71bf2039eaa1 100644
--- a/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
+++ b/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false -enable-post-misched=false | FileCheck %s
; rdar://12713765
; Make sure we are not creating stack objects that are assumed to be 64-byte
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
index 44c24c51f0df..d7188f31c567 100644
--- a/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -190,7 +190,7 @@ define void @atomic_store_seq_cst(i128 %in, i128* %p) {
; CHECK-LABEL: atomic_store_seq_cst:
; CHECK-NOT: dmb
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp xzr, xzr, [x2]
+; CHECK: ldaxp xzr, [[IGNORED:x[0-9]+]], [x2]
; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
; CHECK: cbnz [[SUCCESS]], [[LABEL]]
; CHECK-NOT: dmb
@@ -202,7 +202,7 @@ define void @atomic_store_release(i128 %in, i128* %p) {
; CHECK-LABEL: atomic_store_release:
; CHECK-NOT: dmb
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxp xzr, xzr, [x2]
+; CHECK: ldxp xzr, [[IGNORED:x[0-9]+]], [x2]
; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
; CHECK: cbnz [[SUCCESS]], [[LABEL]]
; CHECK-NOT: dmb
@@ -214,7 +214,7 @@ define void @atomic_store_relaxed(i128 %in, i128* %p) {
; CHECK-LABEL: atomic_store_relaxed:
; CHECK-NOT: dmb
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxp xzr, xzr, [x2]
+; CHECK: ldxp xzr, [[IGNORED:x[0-9]+]], [x2]
; CHECK: stxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
; CHECK: cbnz [[SUCCESS]], [[LABEL]]
; CHECK-NOT: dmb
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
index 5d8d60de5fc5..fef137b1023f 100644
--- a/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/test/CodeGen/AArch64/arm64-atomic.ll
@@ -103,7 +103,7 @@ define i64 @fetch_and_nand_64(i64* %p) #0 {
define i32 @fetch_and_or(i32* %p) #0 {
; CHECK-LABEL: fetch_and_or:
-; CHECK: movz [[OLDVAL_REG:w[0-9]+]], #0x5
+; CHECK: mov [[OLDVAL_REG:w[0-9]+]], #5
; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK: ldaxr w[[DEST_REG:[0-9]+]], [x0]
; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]]
diff --git a/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index 5dca92941211..402e16ccdb21 100644
--- a/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -41,7 +41,7 @@ define i32 @bar(i64 %cav1.coerce) nounwind {
define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind optsize ssp {
; CHECK-LABEL: fct1:
-; CHECK: ubfx
+; CHECK: ubfx x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-NOT: and
; CHECK: ret
@@ -348,8 +348,8 @@ entry:
; CHECK-LABEL: fct16:
; CHECK: ldr [[REG1:w[0-9]+]],
; Create the constant
-; CHECK: movz [[REGCST:w[0-9]+]], #0x1a, lsl #16
-; CHECK: movk [[REGCST]], #0x8160
+; CHECK: mov [[REGCST:w[0-9]+]], #1703936
+; CHECK: movk [[REGCST]], #33120
; Do the masking
; CHECK: and [[REG2:w[0-9]+]], [[REG1]], [[REGCST]]
; CHECK-NEXT: bfxil [[REG2]], w1, #16, #3
@@ -377,8 +377,8 @@ entry:
; CHECK-LABEL: fct17:
; CHECK: ldr [[REG1:x[0-9]+]],
; Create the constant
-; CHECK: movz w[[REGCST:[0-9]+]], #0x1a, lsl #16
-; CHECK: movk w[[REGCST]], #0x8160
+; CHECK: mov w[[REGCST:[0-9]+]], #1703936
+; CHECK: movk w[[REGCST]], #33120
; Do the masking
; CHECK: and [[REG2:x[0-9]+]], [[REG1]], x[[REGCST]]
; CHECK-NEXT: bfxil [[REG2]], x1, #16, #3
diff --git a/test/CodeGen/AArch64/arm64-build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll
index d0f6db080551..1a6c3687dcb0 100644
--- a/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -36,7 +36,7 @@ define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {
; CHECK-LABEL: build_all_zero:
-; CHECK: movz w[[GREG:[0-9]+]], #0xae80
+; CHECK: mov w[[GREG:[0-9]+]], #44672
; CHECK-NEXT: fmov s[[FREG:[0-9]+]], w[[GREG]]
; CHECK-NEXT: mul.8h v0, v0, v[[FREG]]
%b = add <8 x i16> %a, <i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
@@ -56,4 +56,4 @@ define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) {
%vshl_n2 = shl <4 x i16> %vshl_n, <i16 9, i16 9, i16 9, i16 9>
%shuffle.i = shufflevector <4 x i16> %vshl_n2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %shuffle.i
-} \ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/arm64-builtins-linux.ll b/test/CodeGen/AArch64/arm64-builtins-linux.ll
index 34fa1b471561..6caf3a2a18ef 100644
--- a/test/CodeGen/AArch64/arm64-builtins-linux.ll
+++ b/test/CodeGen/AArch64/arm64-builtins-linux.ll
@@ -1,11 +1,11 @@
; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
; Function Attrs: nounwind readnone
-declare i8* @llvm.aarch64.thread.pointer() #1
+declare i8* @llvm.thread.pointer() #1
define i8* @thread_pointer() {
; CHECK: thread_pointer:
; CHECK: mrs {{x[0-9]+}}, TPIDR_EL0
- %1 = tail call i8* @llvm.aarch64.thread.pointer()
+ %1 = tail call i8* @llvm.thread.pointer()
ret i8* %1
}
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll
index 72d3b8331162..748bbcca079f 100644
--- a/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -317,24 +317,6 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
ret i64 %sel
}
-; CHECK-LABEL: select_complicated
-define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) {
-; CHECK: ldr [[REG:d[0-9]+]],
-; CHECK: fcmp d0, d2
-; CHECK-NEXT: fmov d2, #13.00000000
-; CHECK-NEXT: fccmp d1, d2, #4, ne
-; CHECK-NEXT: fccmp d0, d1, #1, ne
-; CHECK-NEXT: fccmp d0, d1, #4, vc
-; CEHCK-NEXT: csel w0, w0, w1, eq
- %1 = fcmp one double %v1, %v2
- %2 = fcmp oeq double %v2, 13.0
- %3 = fcmp oeq double %v1, 42.0
- %or0 = or i1 %2, %3
- %or1 = or i1 %1, %or0
- %sel = select i1 %or1, i16 %a, i16 %b
- ret i16 %sel
-}
-
; CHECK-LABEL: gccbug
define i64 @gccbug(i64 %x0, i64 %x1) {
; CHECK: cmp x0, #2
@@ -443,3 +425,234 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
store volatile i32 %ext, i32* @g
ret i64 %sel
}
+
+; The following is not possible to implement with a single cmp;ccmp;csel
+; sequence.
+; CHECK-LABEL: select_noccmp3
+define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
+ %c0 = icmp slt i32 %v0, 0
+ %c1 = icmp sgt i32 %v0, 13
+ %c2 = icmp slt i32 %v0, 22
+ %c3 = icmp sgt i32 %v0, 44
+ %c4 = icmp eq i32 %v0, 99
+ %c5 = icmp eq i32 %v0, 77
+ %or0 = or i1 %c0, %c1
+ %or1 = or i1 %c2, %c3
+ %and0 = and i1 %or0, %or1
+ %or2 = or i1 %c4, %c5
+ %and1 = and i1 %and0, %or2
+ %sel = select i1 %and1, i32 %v1, i32 %v2
+ ret i32 %sel
+}
+
+; Test the IR CCs that expand to two cond codes.
+
+; CHECK-LABEL: select_and_olt_one:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d2, d3, #4, mi
+; CHECK-NEXT: fccmp d2, d3, #1, ne
+; CHECK-NEXT: csel w0, w0, w1, vc
+; CHECK-NEXT: ret
+define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp olt double %v0, %v1
+ %c1 = fcmp one double %v2, %v3
+ %cr = and i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_and_one_olt:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d0, d1, #1, ne
+; CHECK-NEXT: fccmp d2, d3, #0, vc
+; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: ret
+define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp one double %v0, %v1
+ %c1 = fcmp olt double %v2, %v3
+ %cr = and i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_and_olt_ueq:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d2, d3, #0, mi
+; CHECK-NEXT: fccmp d2, d3, #8, le
+; CHECK-NEXT: csel w0, w0, w1, pl
+; CHECK-NEXT: ret
+define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp olt double %v0, %v1
+ %c1 = fcmp ueq double %v2, %v3
+ %cr = and i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_and_ueq_olt:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d0, d1, #8, le
+; CHECK-NEXT: fccmp d2, d3, #0, pl
+; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: ret
+define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp ueq double %v0, %v1
+ %c1 = fcmp olt double %v2, %v3
+ %cr = and i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_or_olt_one:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d2, d3, #0, pl
+; CHECK-NEXT: fccmp d2, d3, #8, le
+; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: ret
+define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp olt double %v0, %v1
+ %c1 = fcmp one double %v2, %v3
+ %cr = or i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_or_one_olt:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d0, d1, #1, ne
+; CHECK-NEXT: fccmp d2, d3, #8, vs
+; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: ret
+define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp one double %v0, %v1
+ %c1 = fcmp olt double %v2, %v3
+ %cr = or i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_or_olt_ueq:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d2, d3, #4, pl
+; CHECK-NEXT: fccmp d2, d3, #1, ne
+; CHECK-NEXT: csel w0, w0, w1, vs
+; CHECK-NEXT: ret
+define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp olt double %v0, %v1
+ %c1 = fcmp ueq double %v2, %v3
+ %cr = or i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_or_ueq_olt:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d0, d1, #8, le
+; CHECK-NEXT: fccmp d2, d3, #8, mi
+; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: ret
+define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp ueq double %v0, %v1
+ %c1 = fcmp olt double %v2, %v3
+ %cr = or i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_or_olt_ogt_ueq:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d2, d3, #0, pl
+; CHECK-NEXT: fccmp d4, d5, #4, le
+; CHECK-NEXT: fccmp d4, d5, #1, ne
+; CHECK-NEXT: csel w0, w0, w1, vs
+; CHECK-NEXT: ret
+define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 {
+ %c0 = fcmp olt double %v0, %v1
+ %c1 = fcmp ogt double %v2, %v3
+ %c2 = fcmp ueq double %v4, %v5
+ %c3 = or i1 %c1, %c0
+ %cr = or i1 %c2, %c3
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_or_olt_ueq_ogt:
+; CHECK-LABEL: ; BB#0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: fccmp d2, d3, #4, pl
+; CHECK-NEXT: fccmp d2, d3, #1, ne
+; CHECK-NEXT: fccmp d4, d5, #0, vc
+; CHECK-NEXT: csel w0, w0, w1, gt
+; CHECK-NEXT: ret
+define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 {
+ %c0 = fcmp olt double %v0, %v1
+ %c1 = fcmp ueq double %v2, %v3
+ %c2 = fcmp ogt double %v4, %v5
+ %c3 = or i1 %c1, %c0
+ %cr = or i1 %c2, %c3
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; Verify that we correctly promote f16.
+
+; CHECK-LABEL: half_select_and_olt_oge:
+; CHECK-LABEL: ; BB#0:
+; CHECK-DAG: fcvt [[S0:s[0-9]+]], h0
+; CHECK-DAG: fcvt [[S1:s[0-9]+]], h1
+; CHECK-NEXT: fcmp [[S0]], [[S1]]
+; CHECK-DAG: fcvt [[S2:s[0-9]+]], h2
+; CHECK-DAG: fcvt [[S3:s[0-9]+]], h3
+; CHECK-NEXT: fccmp [[S2]], [[S3]], #8, mi
+; CHECK-NEXT: csel w0, w0, w1, ge
+; CHECK-NEXT: ret
+define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp olt half %v0, %v1
+ %c1 = fcmp oge half %v2, %v3
+ %cr = and i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; CHECK-LABEL: half_select_and_olt_one:
+; CHECK-LABEL: ; BB#0:
+; CHECK-DAG: fcvt [[S0:s[0-9]+]], h0
+; CHECK-DAG: fcvt [[S1:s[0-9]+]], h1
+; CHECK-NEXT: fcmp [[S0]], [[S1]]
+; CHECK-DAG: fcvt [[S2:s[0-9]+]], h2
+; CHECK-DAG: fcvt [[S3:s[0-9]+]], h3
+; CHECK-NEXT: fccmp [[S2]], [[S3]], #4, mi
+; CHECK-NEXT: fccmp [[S2]], [[S3]], #1, ne
+; CHECK-NEXT: csel w0, w0, w1, vc
+; CHECK-NEXT: ret
+define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp olt half %v0, %v1
+ %c1 = fcmp one half %v2, %v3
+ %cr = and i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; Also verify that we don't try to generate f128 FCCMPs, using RT calls instead.
+
+; CHECK-LABEL: f128_select_and_olt_oge:
+; CHECK: bl ___lttf2
+; CHECK: bl ___getf2
+define i32 @f128_select_and_olt_oge(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, i32 %a, i32 %b) #0 {
+ %c0 = fcmp olt fp128 %v0, %v1
+ %c1 = fcmp oge fp128 %v2, %v3
+ %cr = and i1 %c1, %c0
+ %sel = select i1 %cr, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/arm64-collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll
index 59147d401a30..3fc0d45f065c 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -613,6 +613,7 @@ define <1 x i8> @getL() {
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF]
+; CHECK-NEXT: ; kill
; Ultimately we should generate str b0, but right now, we match the vector
; variant which does not allow to fold the immediate into the store.
; CHECK-NEXT: st1.b { v0 }[0], {{\[}}[[LDRGOT_REG]]]
@@ -654,4 +655,25 @@ define void @uninterestingSub(i8* nocapture %row) #0 {
ret void
}
+@.str.89 = external unnamed_addr constant [12 x i8], align 1
+@.str.90 = external unnamed_addr constant [5 x i8], align 1
+; CHECK-LABEL: test_r274582
+define void @test_r274582() {
+entry:
+ br i1 undef, label %if.then.i, label %if.end.i
+if.then.i:
+ ret void
+if.end.i:
+; CHECK: .loh AdrpAdrp Lloh91, Lloh93
+; CHECK: .loh AdrpLdr Lloh91, Lloh92
+; CHECK: .loh AdrpLdrGot Lloh93, Lloh95
+; CHECK: .loh AdrpLdrGot Lloh94, Lloh96
+ %mul.i.i.i = fmul double undef, 1.000000e-06
+ %add.i.i.i = fadd double undef, %mul.i.i.i
+ %sub.i.i = fsub double %add.i.i.i, undef
+ call void (i8*, ...) @callee(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.89, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.90, i64 0, i64 0), double %sub.i.i)
+ unreachable
+}
+declare void @callee(i8* nocapture readonly, ...)
+
attributes #0 = { "target-cpu"="cyclone" }
diff --git a/test/CodeGen/AArch64/arm64-const-addr.ll b/test/CodeGen/AArch64/arm64-const-addr.ll
index ffc153344d3a..e55db2904489 100644
--- a/test/CodeGen/AArch64/arm64-const-addr.ll
+++ b/test/CodeGen/AArch64/arm64-const-addr.ll
@@ -5,8 +5,8 @@
; Test if the constant base address gets only materialized once.
define i32 @test1() nounwind {
; CHECK-LABEL: test1
-; CHECK: movz w8, #0x40f, lsl #16
-; CHECK-NEXT: movk w8, #0xc000
+; CHECK: mov w8, #68091904
+; CHECK-NEXT: movk w8, #49152
; CHECK-NEXT: ldp w9, w10, [x8, #4]
; CHECK: ldr w8, [x8, #12]
%at = inttoptr i64 68141056 to %T*
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index b8da39910312..ed061122f311 100644
--- a/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -23,8 +23,8 @@ define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
; CHECK-DAG: xtn2 v[[NA2]].4s, v[[CONV3]].2d
; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
; CHECK-DAG: xtn2 v[[NA0]].4s, v[[CONV1]].2d
-; CHECK-DAG: xtn v[[TMP1:[0-9]+]].4h, v[[NA0]].4s
-; CHECK-DAG: xtn2 v[[TMP1]].8h, v[[NA2]].4s
+; CHECK-DAG: xtn v[[TMP1:[0-9]+]].4h, v[[NA2]].4s
+; CHECK-DAG: xtn2 v[[TMP1]].8h, v[[NA0]].4s
; CHECK: xtn v0.8b, v[[TMP1]].8h
%tmp1 = load <8 x double>, <8 x double>* %ptr
%tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
diff --git a/test/CodeGen/AArch64/arm64-csldst-mmo.ll b/test/CodeGen/AArch64/arm64-csldst-mmo.ll
new file mode 100644
index 000000000000..0b8f7a19b484
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-csldst-mmo.ll
@@ -0,0 +1,25 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched=0 -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; REQUIRES: asserts
+
+@G = external global [0 x i32], align 4
+
+; Check that MMOs are added to epilogue calle-save restore loads so
+; that the store to G is not considered dependant on the callee-save
+; loads.
+;
+; CHECK: Before post-MI-sched:
+; CHECK-LABEL: # Machine code for function test1:
+; CHECK: SU(2): STRWui %WZR
+; CHECK: SU(3): %X21<def>, %X20<def> = LDPXi %SP
+; CHECK: Predecessors:
+; CHECK-NEXT: out SU(0)
+; CHECK-NEXT: out SU(0)
+; CHECK-NEXT: ch SU(0)
+; CHECK-NEXT: Successors:
+define void @test1() {
+entry:
+ tail call void asm sideeffect "nop", "~{x20},~{x21},~{x22},~{x23}"() nounwind
+ store i32 0, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @G, i64 0, i64 0), align 4
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-detect-vec-redux.ll b/test/CodeGen/AArch64/arm64-detect-vec-redux.ll
new file mode 100644
index 000000000000..68130f1c9f88
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-detect-vec-redux.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=arm64-darwin-unknown < %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; Function Attrs: nounwind readnone
+define i32 @dotests_56() #0 {
+entry:
+ %vqshrn_n4 = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> zeroinitializer, i32 19)
+ %shuffle.i109 = shufflevector <2 x i32> %vqshrn_n4, <2 x i32> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
+ %neg = xor <4 x i32> %shuffle.i109, <i32 undef, i32 -1, i32 undef, i32 undef>
+ %shuffle = shufflevector <4 x i32> %neg, <4 x i32> undef, <2 x i32> <i32 1, i32 undef>
+ %mul = mul <2 x i32> %shuffle, <i32 add (i32 extractelement (<2 x i32> bitcast (<1 x i64> <i64 -4264345899313889281> to <2 x i32>), i32 0), i32 sub (i32 0, i32 extractelement (<2 x i32> bitcast (<1 x i64> <i64 -9223231295071453185> to <2 x i32>), i32 0))), i32 undef>
+ %shuffle27 = shufflevector <2 x i32> %mul, <2 x i32> undef, <4 x i32> zeroinitializer
+ %0 = bitcast <4 x i32> %shuffle27 to <8 x i16>
+ %shuffle.i108 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+ %vqshrn_n38 = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %shuffle.i108, i32 1)
+ %shuffle.i = shufflevector <8 x i8> %vqshrn_n38, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = bitcast <16 x i8> %shuffle.i to <2 x i64>
+ %vpaddq_v2.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> undef, <2 x i64> %1) #2
+ %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> undef, <2 x i32> undef) #2
+ %vqdmlal_v3.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %vpaddq_v2.i, <2 x i64> %vqdmlal2.i) #2
+ %vmovn.i = trunc <2 x i64> %vqdmlal_v3.i to <2 x i32>
+ %vqdmulh_v2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %vmovn.i, <2 x i32> zeroinitializer) #2
+ %2 = bitcast <2 x i32> %vqdmulh_v2.i to <1 x i64>
+ %vget_lane = extractelement <1 x i64> %2, i32 0
+ %cmp = icmp ne i64 %vget_lane, -7395147708962464393
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) #1
+
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/AArch64/arm64-extern-weak.ll b/test/CodeGen/AArch64/arm64-extern-weak.ll
index 020c07c739d9..f00efbcea780 100644
--- a/test/CodeGen/AArch64/arm64-extern-weak.ll
+++ b/test/CodeGen/AArch64/arm64-extern-weak.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=static -o - < %s | FileCheck --check-prefix=CHECK-STATIC %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=static -o - < %s | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s
declare extern_weak i32 @var()
@@ -13,11 +13,6 @@ define i32()* @foo() {
; CHECK: adrp x[[VAR:[0-9]+]], :got:var
; CHECK: ldr x0, [x[[VAR]], :got_lo12:var]
-; CHECK-STATIC: .LCPI0_0:
-; CHECK-STATIC-NEXT: .xword var
-; CHECK-STATIC: adrp x[[VAR:[0-9]+]], .LCPI0_0
-; CHECK-STATIC: ldr x0, [x[[VAR]], :lo12:.LCPI0_0]
-
; In the large model, the usual relocations are absolute and can
; materialise 0.
; CHECK-LARGE: movz x0, #:abs_g3:var
@@ -36,11 +31,6 @@ define i32* @bar() {
; CHECK: add x0, [[ARR_VAR]], #20
ret i32* %addr
-; CHECK-STATIC: .LCPI1_0:
-; CHECK-STATIC-NEXT: .xword arr_var
-; CHECK-STATIC: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, :lo12:.LCPI1_0]
-; CHECK-STATIC: add x0, [[BASE]], #20
-
; In the large model, the usual relocations are absolute and can
; materialise 0.
; CHECK-LARGE: movz [[ARR_VAR:x[0-9]+]], #:abs_g3:arr_var
@@ -56,9 +46,6 @@ define i32* @wibble() {
; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
; CHECK: add x0, [[BASE]], :lo12:defined_weak_var
-; CHECK-STATIC: adrp [[BASE:x[0-9]+]], defined_weak_var
-; CHECK-STATIC: add x0, [[BASE]], :lo12:defined_weak_var
-
; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var
; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var
; CHECK-LARGE: movk x0, #:abs_g1_nc:defined_weak_var
diff --git a/test/CodeGen/AArch64/arm64-extract.ll b/test/CodeGen/AArch64/arm64-extract.ll
index 01984662d23a..6e07c4ce4ccb 100644
--- a/test/CodeGen/AArch64/arm64-extract.ll
+++ b/test/CodeGen/AArch64/arm64-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc -aarch64-extr-generation=true -verify-machineinstrs < %s \
+; RUN: llc -verify-machineinstrs < %s \
; RUN: -march=arm64 | FileCheck %s
define i64 @ror_i64(i64 %in) {
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
index e4dc948c4603..9dae7a6f5b69 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
@@ -6,10 +6,10 @@
; Load an address with an offset larget then LDR imm can handle
define i32 @foo() nounwind {
entry:
-; CHECK: @foo
+; CHECK-LABEL: @foo
; CHECK: adrp x[[REG:[0-9]+]], _sortlist@GOTPAGE
; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist@GOTPAGEOFF]
-; CHECK: movz x[[REG2:[0-9]+]], #0x4e20
+; CHECK: mov x[[REG2:[0-9]+]], #20000
; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
; CHECK: ldr w0, [x[[REG3]]]
; CHECK: ret
@@ -19,10 +19,10 @@ entry:
define i64 @foo2() nounwind {
entry:
-; CHECK: @foo2
+; CHECK-LABEL: @foo2
; CHECK: adrp x[[REG:[0-9]+]], _sortlist2@GOTPAGE
; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist2@GOTPAGEOFF]
-; CHECK: movz x[[REG2:[0-9]+]], #0x9c40
+; CHECK: mov x[[REG2:[0-9]+]], #40000
; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
; CHECK: ldr x0, [x[[REG3]]]
; CHECK: ret
@@ -36,10 +36,10 @@ entry:
define signext i8 @foo3() nounwind ssp {
entry:
-; CHECK: @foo3
-; CHECK: movz x[[REG:[0-9]+]], #0xb3a, lsl #32
-; CHECK: movk x[[REG]], #0x73ce, lsl #16
-; CHECK: movk x[[REG]], #0x2ff2
+; CHECK-LABEL: @foo3
+; CHECK: mov x[[REG:[0-9]+]], #12343736008704
+; CHECK: movk x[[REG]], #29646, lsl #16
+; CHECK: movk x[[REG]], #12274
%0 = load i8*, i8** @pd2, align 8
%arrayidx = getelementptr inbounds i8, i8* %0, i64 12345678901234
%1 = load i8, i8* %arrayidx, align 1
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
index a506607a0a5d..bdc24aea2144 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
@@ -1,5 +1,5 @@
; This test should cause the TargetMaterializeAlloca to be invoked
-; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -disable-fp-elim < %s | FileCheck %s
%struct.S1Ty = type { i64 }
%struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty }
@@ -14,7 +14,7 @@ entry:
define void @main() nounwind {
entry:
; CHECK: main
-; CHECK: mov x29, sp
+; CHECK: add x29, sp, #16
; CHECK: mov [[REG:x[0-9]+]], sp
; CHECK-NEXT: add x0, [[REG]], #8
%E = alloca %struct.S2Ty, align 4
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
index d6957f9191e2..59c4e38e5467 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-call.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
@@ -1,6 +1,6 @@
-; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
-; RUN: llc -O0 -fast-isel-abort=2 -code-model=large -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE
-; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=2 -code-model=large -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
define void @call0() nounwind {
entry:
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
index ab29824ccb60..85d000b8606b 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
@@ -18,8 +18,8 @@ entry:
; CHECK: @Rand
; CHECK: adrp [[REG1:x[0-9]+]], _seed@GOTPAGE
; CHECK: ldr [[REG2:x[0-9]+]], {{\[}}[[REG1]], _seed@GOTPAGEOFF{{\]}}
-; CHECK: movz [[REG3:x[0-9]+]], #0x3619
-; CHECK: movz [[REG4:x[0-9]+]], #0x51d
+; CHECK: mov [[REG3:x[0-9]+]], #13849
+; CHECK: mov [[REG4:x[0-9]+]], #1309
; CHECK: ldr [[REG5:x[0-9]+]], {{\[}}[[REG2]]{{\]}}
; CHECK: mul [[REG6:x[0-9]+]], [[REG5]], [[REG4]]
; CHECK: add [[REG7:x[0-9]+]], [[REG6]], [[REG3]]
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
index bb2889eaf4be..a8f30ad4777d 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
@@ -8,7 +8,7 @@ define void @t1() {
; ARM64: adrp x8, _message@PAGE
; ARM64: add x0, x8, _message@PAGEOFF
; ARM64: mov w9, wzr
-; ARM64: movz x2, #0x50
+; ARM64: mov x2, #80
; ARM64: uxtb w1, w9
; ARM64: bl _memset
call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false)
@@ -23,7 +23,7 @@ define void @t2() {
; ARM64: ldr x0, [x8, _temp@GOTPAGEOFF]
; ARM64: adrp x8, _message@PAGE
; ARM64: add x1, x8, _message@PAGEOFF
-; ARM64: movz x2, #0x50
+; ARM64: mov x2, #80
; ARM64: bl _memcpy
call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false)
ret void
@@ -37,7 +37,7 @@ define void @t3() {
; ARM64: ldr x0, [x8, _temp@GOTPAGEOFF]
; ARM64: adrp x8, _message@PAGE
; ARM64: add x1, x8, _message@PAGEOFF
-; ARM64: movz x2, #0x14
+; ARM64: mov x2, #20
; ARM64: bl _memmove
call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false)
ret void
@@ -137,7 +137,7 @@ define void @t8() {
define void @test_distant_memcpy(i8* %dst) {
; ARM64-LABEL: test_distant_memcpy:
; ARM64: mov [[ARRAY:x[0-9]+]], sp
-; ARM64: movz [[OFFSET:x[0-9]+]], #0x1f40
+; ARM64: mov [[OFFSET:x[0-9]+]], #8000
; ARM64: add x[[ADDR:[0-9]+]], [[ARRAY]], [[OFFSET]]
; ARM64: ldrb [[BYTE:w[0-9]+]], [x[[ADDR]]]
; ARM64: strb [[BYTE]], [x0]
diff --git a/test/CodeGen/AArch64/arm64-fcopysign.ll b/test/CodeGen/AArch64/arm64-fcopysign.ll
index feffd41f002a..9bcc8eeca219 100644
--- a/test/CodeGen/AArch64/arm64-fcopysign.ll
+++ b/test/CodeGen/AArch64/arm64-fcopysign.ll
@@ -5,7 +5,7 @@
define float @test1(float %x, float %y) nounwind {
entry:
; CHECK-LABEL: test1:
-; CHECK: movi.4s v2, #0x80, lsl #24
+; CHECK: movi.4s v2, #128, lsl #24
; CHECK: bit.16b v0, v1, v2
%0 = tail call float @copysignf(float %x, float %y) nounwind readnone
ret float %0
@@ -37,7 +37,7 @@ define float @test4() nounwind {
entry:
; CHECK-LABEL: test4:
; CHECK: fcvt s0, d0
-; CHECK: movi.4s v[[CONST:[0-9]+]], #0x80, lsl #24
+; CHECK: movi.4s v[[CONST:[0-9]+]], #128, lsl #24
; CHECK: bit.16b v{{[0-9]+}}, v0, v[[CONST]]
%0 = tail call double (...) @bar() nounwind
%1 = fptrunc double %0 to float
diff --git a/test/CodeGen/AArch64/arm64-fma-combines.ll b/test/CodeGen/AArch64/arm64-fma-combines.ll
new file mode 100644
index 000000000000..ab875c06cc62
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fma-combines.ll
@@ -0,0 +1,136 @@
+; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s
+define void @foo_2d(double* %src) {
+; CHECK-LABEL: %entry
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+ %arrayidx1 = getelementptr inbounds double, double* %src, i64 5
+ %arrayidx2 = getelementptr inbounds double, double* %src, i64 11
+ %tmp = bitcast double* %arrayidx1 to <2 x double>*
+ %tmp1 = load double, double* %arrayidx2, align 8
+ %tmp2 = load double, double* %arrayidx1, align 8
+ %fmul = fmul fast double %tmp1, %tmp1
+ %fmul2 = fmul fast double %tmp2, 0x3F94AFD6A052BF5B
+ %fadd = fadd fast double %fmul, %fmul2
+ br label %for.body
+
+; CHECK-LABEL: %for.body
+; CHECK: fmla.2d {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; CHECK: fmla.2d {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+; CHECK: fmla.d {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}[0]
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx3 = getelementptr inbounds double, double* %src, i64 %indvars.iv.next
+ %tmp3 = load double, double* %arrayidx3, align 8
+ %add = fadd fast double %tmp3, %tmp3
+ %mul = fmul fast double %add, %fadd
+ %e1 = insertelement <2 x double> undef, double %add, i32 0
+ %e2 = insertelement <2 x double> %e1, double %add, i32 1
+ %add2 = fadd fast <2 x double> %e2, <double 3.000000e+00, double -3.000000e+00>
+ %e3 = insertelement <2 x double> undef, double %mul, i32 0
+ %e4 = insertelement <2 x double> %e3, double %mul, i32 1
+ %mul2 = fmul fast <2 x double> %add2,<double 3.000000e+00, double -3.000000e+00>
+ %e5 = insertelement <2 x double> undef, double %add, i32 0
+ %e6 = insertelement <2 x double> %e5, double %add, i32 1
+ %add3 = fadd fast <2 x double> %mul2, <double 3.000000e+00, double -3.000000e+00>
+ %mulx = fmul fast <2 x double> %add2, %e2
+ %addx = fadd fast <2 x double> %mulx, %e4
+ %e7 = insertelement <2 x double> undef, double %mul, i32 0
+ %e8 = insertelement <2 x double> %e7, double %mul, i32 1
+ %e9 = fmul fast <2 x double> %addx, %add3
+ store <2 x double> %e9, <2 x double>* %tmp, align 8
+ %e10 = extractelement <2 x double> %add3, i32 0
+ %mul3 = fmul fast double %mul, %e10
+ %add4 = fadd fast double %mul3, %mul
+ store double %add4, double* %arrayidx2, align 8
+ %exitcond = icmp eq i64 %indvars.iv.next, 25
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+define void @foo_2s(float* %src) {
+entry:
+ %arrayidx1 = getelementptr inbounds float, float* %src, i64 5
+ %arrayidx2 = getelementptr inbounds float, float* %src, i64 11
+ %tmp = bitcast float* %arrayidx1 to <2 x float>*
+ br label %for.body
+
+; CHECK-LABEL: %for.body
+; CHECK: fmla.2s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; CHECK: fmla.2s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+; CHECK: fmla.s {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}[0]
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx3 = getelementptr inbounds float, float* %src, i64 %indvars.iv.next
+ %tmp1 = load float, float* %arrayidx3, align 8
+ %add = fadd fast float %tmp1, %tmp1
+ %mul = fmul fast float %add, %add
+ %e1 = insertelement <2 x float> undef, float %add, i32 0
+ %e2 = insertelement <2 x float> %e1, float %add, i32 1
+ %add2 = fadd fast <2 x float> %e2, <float 3.000000e+00, float -3.000000e+00>
+ %e3 = insertelement <2 x float> undef, float %mul, i32 0
+ %e4 = insertelement <2 x float> %e3, float %mul, i32 1
+ %mul2 = fmul fast <2 x float> %add2,<float 3.000000e+00, float -3.000000e+00>
+ %e5 = insertelement <2 x float> undef, float %add, i32 0
+ %e6 = insertelement <2 x float> %e5, float %add, i32 1
+ %add3 = fadd fast <2 x float> %mul2, <float 3.000000e+00, float -3.000000e+00>
+ %mulx = fmul fast <2 x float> %add2, %e2
+ %addx = fadd fast <2 x float> %mulx, %e4
+ %e7 = insertelement <2 x float> undef, float %mul, i32 0
+ %e8 = insertelement <2 x float> %e7, float %mul, i32 1
+ %e9 = fmul fast <2 x float> %addx, %add3
+ store <2 x float> %e9, <2 x float>* %tmp, align 8
+ %e10 = extractelement <2 x float> %add3, i32 0
+ %mul3 = fmul fast float %mul, %e10
+ %add4 = fadd fast float %mul3, %mul
+ store float %add4, float* %arrayidx2, align 8
+ %exitcond = icmp eq i64 %indvars.iv.next, 25
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+define void @foo_4s(float* %src) {
+entry:
+ %arrayidx1 = getelementptr inbounds float, float* %src, i64 5
+ %arrayidx2 = getelementptr inbounds float, float* %src, i64 11
+ %tmp = bitcast float* %arrayidx1 to <4 x float>*
+ br label %for.body
+
+; CHECK-LABEL: %for.body
+; CHECK: fmla.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; CHECK: fmla.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx3 = getelementptr inbounds float, float* %src, i64 %indvars.iv.next
+ %tmp1 = load float, float* %arrayidx3, align 8
+ %add = fadd fast float %tmp1, %tmp1
+ %mul = fmul fast float %add, %add
+ %e1 = insertelement <4 x float> undef, float %add, i32 0
+ %e2 = insertelement <4 x float> %e1, float %add, i32 1
+ %add2 = fadd fast <4 x float> %e2, <float 3.000000e+00, float -3.000000e+00, float 5.000000e+00, float 7.000000e+00>
+ %e3 = insertelement <4 x float> undef, float %mul, i32 0
+ %e4 = insertelement <4 x float> %e3, float %mul, i32 1
+ %mul2 = fmul fast <4 x float> %add2,<float 3.000000e+00, float -3.000000e+00, float 5.000000e+00, float 7.000000e+00>
+ %e5 = insertelement <4 x float> undef, float %add, i32 0
+ %e6 = insertelement <4 x float> %e5, float %add, i32 1
+ %add3 = fadd fast <4 x float> %mul2, <float 3.000000e+00, float -3.000000e+00, float 5.000000e+00, float 7.000000e+00>
+ %mulx = fmul fast <4 x float> %add2, %e2
+ %addx = fadd fast <4 x float> %mulx, %e4
+ %e7 = insertelement <4 x float> undef, float %mul, i32 0
+ %e8 = insertelement <4 x float> %e7, float %mul, i32 1
+ %e9 = fmul fast <4 x float> %addx, %add3
+ store <4 x float> %e9, <4 x float>* %tmp, align 8
+ %e10 = extractelement <4 x float> %add3, i32 0
+ %mul3 = fmul fast float %mul, %e10
+ store float %mul3, float* %arrayidx2, align 8
+ %exitcond = icmp eq i64 %indvars.iv.next, 25
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-fml-combines.ll b/test/CodeGen/AArch64/arm64-fml-combines.ll
new file mode 100644
index 000000000000..840d1dcbf060
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fml-combines.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s
+define void @foo_2d(double* %src) {
+entry:
+ %arrayidx1 = getelementptr inbounds double, double* %src, i64 5
+ %arrayidx2 = getelementptr inbounds double, double* %src, i64 11
+ %tmp = bitcast double* %arrayidx1 to <2 x double>*
+ br label %for.body
+
+; CHECK-LABEL: %for.body
+; CHECK: fmls.2d {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; CHECK: fmls.2d {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+; CHECK: fmls.d {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}[0]
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = sub nuw nsw i64 %indvars.iv, 1
+ %arrayidx3 = getelementptr inbounds double, double* %src, i64 %indvars.iv.next
+ %tmp1 = load double, double* %arrayidx3, align 8
+ %add = fadd fast double %tmp1, %tmp1
+ %mul = fmul fast double %add, %add
+ %e1 = insertelement <2 x double> undef, double %add, i32 0
+ %e2 = insertelement <2 x double> %e1, double %add, i32 1
+ %sub2 = fsub fast <2 x double> %e2, <double 3.000000e+00, double -3.000000e+00>
+ %e3 = insertelement <2 x double> undef, double %mul, i32 0
+ %e4 = insertelement <2 x double> %e3, double %mul, i32 1
+ %mul2 = fmul fast <2 x double> %sub2,<double 3.000000e+00, double -3.000000e+00>
+ %e5 = insertelement <2 x double> undef, double %add, i32 0
+ %e6 = insertelement <2 x double> %e5, double %add, i32 1
+ %sub3 = fsub fast <2 x double> <double 3.000000e+00, double -3.000000e+00>, %mul2
+ %mulx = fmul fast <2 x double> %sub2, %e2
+ %subx = fsub fast <2 x double> %e4, %mulx
+ %e7 = insertelement <2 x double> undef, double %mul, i32 0
+ %e8 = insertelement <2 x double> %e7, double %mul, i32 1
+ %e9 = fmul fast <2 x double> %subx, %sub3
+ store <2 x double> %e9, <2 x double>* %tmp, align 8
+ %e10 = extractelement <2 x double> %sub3, i32 0
+ %mul3 = fmul fast double %mul, %e10
+ %sub4 = fsub fast double %mul, %mul3
+ store double %sub4, double* %arrayidx2, align 8
+ %exitcond = icmp eq i64 %indvars.iv.next, 25
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+define void @foo_2s(float* %src) {
+entry:
+ %arrayidx1 = getelementptr inbounds float, float* %src, i64 5
+ %arrayidx2 = getelementptr inbounds float, float* %src, i64 11
+ %tmp = bitcast float* %arrayidx1 to <2 x float>*
+ br label %for.body
+
+; CHECK-LABEL: %for.body
+; CHECK: fmls.2s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; CHECK: fmls.2s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+; CHECK: fmls.s {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}[0]
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx3 = getelementptr inbounds float, float* %src, i64 %indvars.iv.next
+ %tmp1 = load float, float* %arrayidx3, align 8
+ %add = fadd fast float %tmp1, %tmp1
+ %mul = fmul fast float %add, %add
+ %e1 = insertelement <2 x float> undef, float %add, i32 0
+ %e2 = insertelement <2 x float> %e1, float %add, i32 1
+ %add2 = fsub fast <2 x float> %e2, <float 3.000000e+00, float -3.000000e+00>
+ %e3 = insertelement <2 x float> undef, float %mul, i32 0
+ %e4 = insertelement <2 x float> %e3, float %mul, i32 1
+ %mul2 = fmul fast <2 x float> %add2,<float 3.000000e+00, float -3.000000e+00>
+ %e5 = insertelement <2 x float> undef, float %add, i32 0
+ %e6 = insertelement <2 x float> %e5, float %add, i32 1
+ %add3 = fsub fast <2 x float> <float 3.000000e+00, float -3.000000e+00>, %mul2
+ %mulx = fmul fast <2 x float> %add2, %e2
+ %addx = fsub fast <2 x float> %e4, %mulx
+ %e7 = insertelement <2 x float> undef, float %mul, i32 0
+ %e8 = insertelement <2 x float> %e7, float %mul, i32 1
+ %e9 = fmul fast <2 x float> %addx, %add3
+ store <2 x float> %e9, <2 x float>* %tmp, align 8
+ %e10 = extractelement <2 x float> %add3, i32 0
+ %mul3 = fmul fast float %mul, %e10
+ %add4 = fsub fast float %mul, %mul3
+ store float %add4, float* %arrayidx2, align 8
+ %exitcond = icmp eq i64 %indvars.iv.next, 25
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+define void @foo_4s(float* %src) {
+entry:
+ %arrayidx1 = getelementptr inbounds float, float* %src, i64 5
+ %arrayidx2 = getelementptr inbounds float, float* %src, i64 11
+ %tmp = bitcast float* %arrayidx1 to <4 x float>*
+ br label %for.body
+
+; CHECK-LABEL: %for.body
+; CHECK: fmls.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; CHECK: fmls.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx3 = getelementptr inbounds float, float* %src, i64 %indvars.iv.next
+ %tmp1 = load float, float* %arrayidx3, align 8
+ %add = fadd fast float %tmp1, %tmp1
+ %mul = fmul fast float %add, %add
+ %e1 = insertelement <4 x float> undef, float %add, i32 0
+ %e2 = insertelement <4 x float> %e1, float %add, i32 1
+ %add2 = fadd fast <4 x float> %e2, <float 3.000000e+00, float -3.000000e+00, float 5.000000e+00, float 7.000000e+00>
+ %e3 = insertelement <4 x float> undef, float %mul, i32 0
+ %e4 = insertelement <4 x float> %e3, float %mul, i32 1
+ %mul2 = fmul fast <4 x float> %add2,<float 3.000000e+00, float -3.000000e+00, float 5.000000e+00, float 7.000000e+00>
+ %e5 = insertelement <4 x float> undef, float %add, i32 0
+ %e6 = insertelement <4 x float> %e5, float %add, i32 1
+ %add3 = fsub fast <4 x float> <float 3.000000e+00, float -3.000000e+00, float 5.000000e+00, float 7.000000e+00> , %mul2
+ %mulx = fmul fast <4 x float> %add2, %e2
+ %addx = fsub fast <4 x float> %e4, %mulx
+ %e7 = insertelement <4 x float> undef, float %mul, i32 0
+ %e8 = insertelement <4 x float> %e7, float %mul, i32 1
+ %e9 = fmul fast <4 x float> %addx, %add3
+ store <4 x float> %e9, <4 x float>* %tmp, align 8
+ %e10 = extractelement <4 x float> %add3, i32 0
+ %mul3 = fmul fast float %mul, %e10
+ store float %mul3, float* %arrayidx2, align 8
+ %exitcond = icmp eq i64 %indvars.iv.next, 25
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll
index 097fe2ca6ed9..bcb196e40456 100644
--- a/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/test/CodeGen/AArch64/arm64-fp128.ll
@@ -174,11 +174,11 @@ define i32 @test_br_cc() {
iftrue:
ret i32 42
; CHECK-NEXT: BB#
-; CHECK-NEXT: movz w0, #0x2a
+; CHECK-NEXT: mov w0, #42
; CHECK: ret
iffalse:
ret i32 29
-; CHECK: movz w0, #0x1d
+; CHECK: mov w0, #29
; CHECK: ret
}
diff --git a/test/CodeGen/AArch64/arm64-hello.ll b/test/CodeGen/AArch64/arm64-hello.ll
index 895bfe4b3915..caaf8615cd4a 100644
--- a/test/CodeGen/AArch64/arm64-hello.ll
+++ b/test/CodeGen/AArch64/arm64-hello.ll
@@ -1,28 +1,25 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra -disable-fp-elim | FileCheck %s
; RUN: llc < %s -mtriple=arm64-linux-gnu -disable-post-ra | FileCheck %s --check-prefix=CHECK-LINUX
; CHECK-LABEL: main:
-; CHECK: stp x29, x30, [sp, #-16]!
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #16
+; CHECK: sub sp, sp, #32
+; CHECK-NEXT: stp x29, x30, [sp, #16]
+; CHECK-NEXT: add x29, sp, #16
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK: adrp x0, L_.str@PAGE
; CHECK: add x0, x0, L_.str@PAGEOFF
; CHECK-NEXT: bl _puts
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ldp x29, x30, [sp, #16]
+; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
; CHECK-LINUX-LABEL: main:
-; CHECK-LINUX: stp x29, x30, [sp, #-16]!
-; CHECK-LINUX-NEXT: mov x29, sp
-; CHECK-LINUX-NEXT: sub sp, sp, #16
-; CHECK-LINUX-NEXT: stur wzr, [x29, #-4]
+; CHECK-LINUX: str x30, [sp, #-16]!
+; CHECK-LINUX-NEXT: str wzr, [sp, #12]
; CHECK-LINUX: adrp x0, .L.str
; CHECK-LINUX: add x0, x0, :lo12:.L.str
; CHECK-LINUX-NEXT: bl puts
-; CHECK-LINUX-NEXT: mov sp, x29
-; CHECK-LINUX-NEXT: ldp x29, x30, [sp], #16
+; CHECK-LINUX-NEXT: ldr x30, [sp], #16
; CHECK-LINUX-NEXT: ret
@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00"
diff --git a/test/CodeGen/AArch64/arm64-inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll
index ac6e8a7731c6..4d4adb10d556 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
; rdar://9167275
@@ -232,3 +232,17 @@ define void @test_zero_reg(i32* %addr) {
ret void
}
+
+define <2 x float> @test_vreg_64bit(<2 x float> %in) nounwind {
+ ; CHECK-LABEL: test_vreg_64bit:
+ %1 = tail call <2 x float> asm sideeffect "fadd ${0}.2s, ${1}.2s, ${1}.2s", "={v14},w"(<2 x float> %in) nounwind
+ ; CHECK fadd v14.2s, v0.2s, v0.2s:
+ ret <2 x float> %1
+}
+
+define <4 x float> @test_vreg_128bit(<4 x float> %in) nounwind {
+ ; CHECK-LABEL: test_vreg_128bit:
+ %1 = tail call <4 x float> asm sideeffect "fadd ${0}.4s, ${1}.4s, ${1}.4s", "={v14},w"(<4 x float> %in) nounwind
+ ; CHECK fadd v14.4s, v0.4s, v0.4s:
+ ret <4 x float> %1
+}
diff --git a/test/CodeGen/AArch64/arm64-join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll
index c65cf95be2e5..dee034483541 100644
--- a/test/CodeGen/AArch64/arm64-join-reserved.ll
+++ b/test/CodeGen/AArch64/arm64-join-reserved.ll
@@ -5,7 +5,7 @@ target triple = "arm64-apple-macosx10"
; A move isn't necessary.
; <rdar://problem/11492712>
; CHECK-LABEL: g:
-; CHECK: str xzr, [sp, #-16]!
+; CHECK: str xzr, [sp]
; CHECK: bl
; CHECK: ret
define void @g() nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/test/CodeGen/AArch64/arm64-ldp-cluster.ll
new file mode 100644
index 000000000000..0cfbe5958f4d
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ldp-cluster.ll
@@ -0,0 +1,150 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m1 -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck --check-prefix=EXYNOS %s
+
+; Test ldr clustering.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldr_int:BB#0
+; CHECK: Cluster ld/st SU(1) - SU(2)
+; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
+; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
+; EXYNOS: ********** MI Scheduling **********
+; EXYNOS-LABEL: ldr_int:BB#0
+; EXYNOS: Cluster ld/st SU(1) - SU(2)
+; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
+; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
+define i32 @ldr_int(i32* %a) nounwind {
+ %p1 = getelementptr inbounds i32, i32* %a, i32 1
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 2
+ %tmp2 = load i32, i32* %p2, align 2
+ %tmp3 = add i32 %tmp1, %tmp2
+ ret i32 %tmp3
+}
+
+; Test ldpsw clustering
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldp_sext_int:BB#0
+; CHECK: Cluster ld/st SU(1) - SU(2)
+; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui
+; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui
+; EXYNOS: ********** MI Scheduling **********
+; EXYNOS-LABEL: ldp_sext_int:BB#0
+; EXYNOS: Cluster ld/st SU(1) - SU(2)
+; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui
+; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui
+define i64 @ldp_sext_int(i32* %p) nounwind {
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = sext i32 %tmp to i64
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ ret i64 %add
+}
+
+; Test ldur clustering.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldur_int:BB#0
+; CHECK: Cluster ld/st SU(2) - SU(1)
+; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDURWi
+; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDURWi
+; EXYNOS: ********** MI Scheduling **********
+; EXYNOS-LABEL: ldur_int:BB#0
+; EXYNOS: Cluster ld/st SU(2) - SU(1)
+; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDURWi
+; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDURWi
+define i32 @ldur_int(i32* %a) nounwind {
+ %p1 = getelementptr inbounds i32, i32* %a, i32 -1
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 -2
+ %tmp2 = load i32, i32* %p2, align 2
+ %tmp3 = add i32 %tmp1, %tmp2
+ ret i32 %tmp3
+}
+
+; Test sext + zext clustering.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldp_half_sext_zext_int:BB#0
+; CHECK: Cluster ld/st SU(3) - SU(4)
+; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui
+; CHECK: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
+; EXYNOS: ********** MI Scheduling **********
+; EXYNOS-LABEL: ldp_half_sext_zext_int:BB#0
+; EXYNOS: Cluster ld/st SU(3) - SU(4)
+; EXYNOS: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui
+; EXYNOS: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
+define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind {
+ %tmp0 = load i64, i64* %q, align 4
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = sext i32 %tmp to i64
+ %sexttmp1 = zext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ %add1 = add nsw i64 %add, %tmp0
+ ret i64 %add1
+}
+
+; Test zext + sext clustering.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldp_half_zext_sext_int:BB#0
+; CHECK: Cluster ld/st SU(3) - SU(4)
+; CHECK: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
+; CHECK: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui
+; EXYNOS: ********** MI Scheduling **********
+; EXYNOS-LABEL: ldp_half_zext_sext_int:BB#0
+; EXYNOS: Cluster ld/st SU(3) - SU(4)
+; EXYNOS: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
+; EXYNOS: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui
+define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind {
+ %tmp0 = load i64, i64* %q, align 4
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = zext i32 %tmp to i64
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ %add1 = add nsw i64 %add, %tmp0
+ ret i64 %add1
+}
+
+; Verify we don't cluster volatile loads.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldr_int_volatile:BB#0
+; CHECK-NOT: Cluster ld/st
+; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
+; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
+; EXYNOS: ********** MI Scheduling **********
+; EXYNOS-LABEL: ldr_int_volatile:BB#0
+; EXYNOS-NOT: Cluster ld/st
+; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
+; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
+define i32 @ldr_int_volatile(i32* %a) nounwind {
+ %p1 = getelementptr inbounds i32, i32* %a, i32 1
+ %tmp1 = load volatile i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 2
+ %tmp2 = load volatile i32, i32* %p2, align 2
+ %tmp3 = add i32 %tmp1, %tmp2
+ ret i32 %tmp3
+}
+
+; Test ldq clustering (no clustering for Exynos).
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldq_cluster:BB#0
+; CHECK: Cluster ld/st SU(1) - SU(3)
+; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRQui
+; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRQui
+; EXYNOS: ********** MI Scheduling **********
+; EXYNOS-LABEL: ldq_cluster:BB#0
+; EXYNOS-NOT: Cluster ld/st
+define <2 x i64> @ldq_cluster(i64* %p) {
+ %a1 = bitcast i64* %p to <2 x i64>*
+ %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8
+ %add.ptr2 = getelementptr inbounds i64, i64* %p, i64 2
+ %a2 = bitcast i64* %add.ptr2 to <2 x i64>*
+ %tmp2 = add nsw <2 x i64> %tmp1, %tmp1
+ %tmp3 = load <2 x i64>, <2 x i64>* %a2, align 8
+ %res = mul nsw <2 x i64> %tmp2, %tmp3
+ ret <2 x i64> %res
+}
diff --git a/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-memcpy-inline.ll
index 2f91ce252336..23e90100fb94 100644
--- a/test/CodeGen/AArch64/arm64-memcpy-inline.ll
+++ b/test/CodeGen/AArch64/arm64-memcpy-inline.ll
@@ -40,7 +40,7 @@ entry:
define void @t2(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t2:
-; CHECK: movz [[REG3:w[0-9]+]]
+; CHECK: mov [[REG3:w[0-9]+]]
; CHECK: movk [[REG3]],
; CHECK: str [[REG3]], [x0, #32]
; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}]
@@ -75,9 +75,9 @@ define void @t5(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t5:
; CHECK: strb wzr, [x0, #6]
-; CHECK: movz [[REG7:w[0-9]+]], #0x5453
+; CHECK: mov [[REG7:w[0-9]+]], #21587
; CHECK: strh [[REG7]], [x0, #4]
-; CHECK: movz [[REG8:w[0-9]+]],
+; CHECK: mov [[REG8:w[0-9]+]],
; CHECK: movk [[REG8]],
; CHECK: str [[REG8]], [x0]
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
index 6db21043f670..8b270abef59a 100644
--- a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
@@ -1,5 +1,6 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - -misched-limit=2 2>&1 > /dev/null | FileCheck %s
;
; The Cortex-A53 machine model will cause the MADD instruction to be scheduled
; much higher than the ADD instructions in order to hide latency. When not
diff --git a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll
index 770521b75280..292fbb744cea 100644
--- a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll
+++ b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll
@@ -9,6 +9,9 @@
; CHECK: Successors:
; CHECK-NEXT: val SU(5): Latency=4 Reg=%vreg2
; CHECK-NEXT: ch SU(4): Latency=0
+; CHECK: SU(3): STRWui %WZR, %vreg0, 0; mem:ST4[%ptr1] GPR64common:%vreg0
+; CHECK: Successors:
+; CHECK: ch SU(4): Latency=0
; CHECK: SU(4): STRWui %WZR, %vreg1, 0; mem:ST4[%ptr2] GPR64common:%vreg1
; CHECK: SU(5): %W0<def> = COPY %vreg2; GPR32:%vreg2
; CHECK: ** ScheduleDAGMI::schedule picking next node
diff --git a/test/CodeGen/AArch64/arm64-misched-multimmo.ll b/test/CodeGen/AArch64/arm64-misched-multimmo.ll
new file mode 100644
index 000000000000..d4e8aa1a0a06
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-misched-multimmo.ll
@@ -0,0 +1,23 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched=0 -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+
+
+@G1 = common global [100 x i32] zeroinitializer, align 4
+@G2 = common global [100 x i32] zeroinitializer, align 4
+
+; Check that no scheduling dependencies are created between the paired loads and the store during post-RA MI scheduling.
+;
+; CHECK-LABEL: # Machine code for function foo: Properties: <Post SSA
+; CHECK: SU(2): %W{{[0-9]+}}<def>, %W{{[0-9]+}}<def> = LDPWi
+; CHECK: Successors:
+; CHECK-NOT: ch SU(4)
+; CHECK: SU(3)
+; CHECK: SU(4): STRWui %WZR, %X{{[0-9]+}}
+define i32 @foo() {
+entry:
+ %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @G2, i64 0, i64 0), align 4
+ %1 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @G2, i64 0, i64 1), align 4
+ store i32 0, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @G1, i64 0, i64 0), align 4
+ %add = add nsw i32 %1, %0
+ ret i32 %add
+}
diff --git a/test/CodeGen/AArch64/arm64-movi.ll b/test/CodeGen/AArch64/arm64-movi.ll
index 2cd368d909dc..344e2224ab43 100644
--- a/test/CodeGen/AArch64/arm64-movi.ll
+++ b/test/CodeGen/AArch64/arm64-movi.ll
@@ -7,21 +7,21 @@
; 64-bit immed with 32-bit pattern size, rotated by 0.
define i64 @test64_32_rot0() nounwind {
; CHECK-LABEL: test64_32_rot0:
-; CHECK: orr x0, xzr, #0x700000007
+; CHECK: mov x0, #30064771079
ret i64 30064771079
}
; 64-bit immed with 32-bit pattern size, rotated by 2.
define i64 @test64_32_rot2() nounwind {
; CHECK-LABEL: test64_32_rot2:
-; CHECK: orr x0, xzr, #0xc0000003c0000003
+; CHECK: mov x0, #-4611686002321260541
ret i64 13835058071388291075
}
; 64-bit immed with 4-bit pattern size, rotated by 3.
define i64 @test64_4_rot3() nounwind {
; CHECK-LABEL: test64_4_rot3:
-; CHECK: orr x0, xzr, #0xeeeeeeeeeeeeeeee
+; CHECK: mov x0, #-1229782938247303442
ret i64 17216961135462248174
}
@@ -35,7 +35,7 @@ define i32 @test32_32_rot16() nounwind {
; 32-bit immed with 2-bit pattern size, rotated by 1.
define i32 @test32_2_rot1() nounwind {
; CHECK-LABEL: test32_2_rot1:
-; CHECK: orr w0, wzr, #0xaaaaaaaa
+; CHECK: mov w0, #-1431655766
ret i32 2863311530
}
@@ -45,30 +45,30 @@ define i32 @test32_2_rot1() nounwind {
define i32 @movz() nounwind {
; CHECK-LABEL: movz:
-; CHECK: movz w0, #0x5
+; CHECK: mov w0, #5
ret i32 5
}
define i64 @movz_3movk() nounwind {
; CHECK-LABEL: movz_3movk:
-; CHECK: movz x0, #0x5, lsl #48
-; CHECK-NEXT: movk x0, #0x1234, lsl #32
-; CHECK-NEXT: movk x0, #0xabcd, lsl #16
-; CHECK-NEXT: movk x0, #0x5678
+; CHECK: mov x0, #1407374883553280
+; CHECK-NEXT: movk x0, #4660, lsl #32
+; CHECK-NEXT: movk x0, #43981, lsl #16
+; CHECK-NEXT: movk x0, #22136
ret i64 1427392313513592
}
define i64 @movz_movk_skip1() nounwind {
; CHECK-LABEL: movz_movk_skip1:
-; CHECK: movz x0, #0x5, lsl #32
-; CHECK-NEXT: movk x0, #0x4321, lsl #16
+; CHECK: mov x0, #21474836480
+; CHECK-NEXT: movk x0, #17185, lsl #16
ret i64 22601072640
}
define i64 @movz_skip1_movk() nounwind {
; CHECK-LABEL: movz_skip1_movk:
-; CHECK: movz x0, #0x8654, lsl #32
-; CHECK-NEXT: movk x0, #0x1234
+; CHECK: mov x0, #147695335374848
+; CHECK-NEXT: movk x0, #4660
ret i64 147695335379508
}
@@ -78,14 +78,14 @@ define i64 @movz_skip1_movk() nounwind {
define i64 @movn() nounwind {
; CHECK-LABEL: movn:
-; CHECK: movn x0, #0x29
+; CHECK: mov x0, #-42
ret i64 -42
}
define i64 @movn_skip1_movk() nounwind {
; CHECK-LABEL: movn_skip1_movk:
-; CHECK: movn x0, #0x29, lsl #32
-; CHECK-NEXT: movk x0, #0x1234
+; CHECK: mov x0, #-176093659137
+; CHECK-NEXT: movk x0, #4660
ret i64 -176093720012
}
@@ -96,107 +96,107 @@ define i64 @movn_skip1_movk() nounwind {
define i64 @orr_movk1() nounwind {
; CHECK-LABEL: orr_movk1:
-; CHECK: orr x0, xzr, #0xffff0000ffff0
-; CHECK: movk x0, #0xdead, lsl #16
+; CHECK: mov x0, #72056494543077120
+; CHECK: movk x0, #57005, lsl #16
ret i64 72056498262245120
}
define i64 @orr_movk2() nounwind {
; CHECK-LABEL: orr_movk2:
-; CHECK: orr x0, xzr, #0xffff0000ffff0
-; CHECK: movk x0, #0xdead, lsl #48
+; CHECK: mov x0, #72056494543077120
+; CHECK: movk x0, #57005, lsl #48
ret i64 -2400982650836746496
}
define i64 @orr_movk3() nounwind {
; CHECK-LABEL: orr_movk3:
-; CHECK: orr x0, xzr, #0xffff0000ffff0
-; CHECK: movk x0, #0xdead, lsl #32
+; CHECK: mov x0, #72056494543077120
+; CHECK: movk x0, #57005, lsl #32
ret i64 72020953688702720
}
define i64 @orr_movk4() nounwind {
; CHECK-LABEL: orr_movk4:
-; CHECK: orr x0, xzr, #0xffff0000ffff0
-; CHECK: movk x0, #0xdead
+; CHECK: mov x0, #72056494543077120
+; CHECK: movk x0, #57005
ret i64 72056494543068845
}
; rdar://14987618
define i64 @orr_movk5() nounwind {
; CHECK-LABEL: orr_movk5:
-; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
-; CHECK: movk x0, #0xdead, lsl #16
+; CHECK: mov x0, #-71777214294589696
+; CHECK: movk x0, #57005, lsl #16
ret i64 -71777214836900096
}
define i64 @orr_movk6() nounwind {
; CHECK-LABEL: orr_movk6:
-; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
-; CHECK: movk x0, #0xdead, lsl #16
-; CHECK: movk x0, #0xdead, lsl #48
+; CHECK: mov x0, #-71777214294589696
+; CHECK: movk x0, #57005, lsl #16
+; CHECK: movk x0, #57005, lsl #48
ret i64 -2400982647117578496
}
define i64 @orr_movk7() nounwind {
; CHECK-LABEL: orr_movk7:
-; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
-; CHECK: movk x0, #0xdead, lsl #48
+; CHECK: mov x0, #-71777214294589696
+; CHECK: movk x0, #57005, lsl #48
ret i64 -2400982646575268096
}
define i64 @orr_movk8() nounwind {
; CHECK-LABEL: orr_movk8:
-; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
-; CHECK: movk x0, #0xdead
-; CHECK: movk x0, #0xdead, lsl #48
+; CHECK: mov x0, #-71777214294589696
+; CHECK: movk x0, #57005
+; CHECK: movk x0, #57005, lsl #48
ret i64 -2400982646575276371
}
; rdar://14987715
define i64 @orr_movk9() nounwind {
; CHECK-LABEL: orr_movk9:
-; CHECK: orr x0, xzr, #0xffffff000000000
-; CHECK: movk x0, #0xff00
-; CHECK: movk x0, #0xdead, lsl #16
+; CHECK: mov x0, #1152921435887370240
+; CHECK: movk x0, #65280
+; CHECK: movk x0, #57005, lsl #16
ret i64 1152921439623315200
}
define i64 @orr_movk10() nounwind {
; CHECK-LABEL: orr_movk10:
-; CHECK: orr x0, xzr, #0xfffffffffffff00
-; CHECK: movk x0, #0xdead, lsl #16
+; CHECK: mov x0, #1152921504606846720
+; CHECK: movk x0, #57005, lsl #16
ret i64 1152921504047824640
}
define i64 @orr_movk11() nounwind {
; CHECK-LABEL: orr_movk11:
-; CHECK: orr x0, xzr, #0xfff00000000000ff
-; CHECK: movk x0, #0xdead, lsl #16
-; CHECK: movk x0, #0xffff, lsl #32
+; CHECK: mov x0, #-4503599627370241
+; CHECK: movk x0, #57005, lsl #16
+; CHECK: movk x0, #65535, lsl #32
ret i64 -4222125209747201
}
define i64 @orr_movk12() nounwind {
; CHECK-LABEL: orr_movk12:
-; CHECK: orr x0, xzr, #0xfff00000000000ff
-; CHECK: movk x0, #0xdead, lsl #32
+; CHECK: mov x0, #-4503599627370241
+; CHECK: movk x0, #57005, lsl #32
ret i64 -4258765016661761
}
define i64 @orr_movk13() nounwind {
; CHECK-LABEL: orr_movk13:
-; CHECK: orr x0, xzr, #0xfffff000000
-; CHECK: movk x0, #0xdead
-; CHECK: movk x0, #0xdead, lsl #48
+; CHECK: mov x0, #17592169267200
+; CHECK: movk x0, #57005
+; CHECK: movk x0, #57005, lsl #48
ret i64 -2401245434149282131
}
; rdar://13944082
define i64 @g() nounwind {
; CHECK-LABEL: g:
-; CHECK: movz x0, #0xffff, lsl #48
-; CHECK: movk x0, #0x2
+; CHECK: mov x0, #-281474976710656
+; CHECK: movk x0, #2
entry:
ret i64 -281474976710654
}
diff --git a/test/CodeGen/AArch64/arm64-mul.ll b/test/CodeGen/AArch64/arm64-mul.ll
index 2e7986d67d9e..a424dc761bc8 100644
--- a/test/CodeGen/AArch64/arm64-mul.ll
+++ b/test/CodeGen/AArch64/arm64-mul.ll
@@ -88,3 +88,65 @@ entry:
%tmp4 = sub i64 0, %tmp3
ret i64 %tmp4
}
+
+define i64 @t9(i32 %a) nounwind {
+entry:
+; CHECK-LABEL: t9:
+; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+ %tmp1 = zext i32 %a to i64
+ %tmp2 = mul i64 %tmp1, 139968
+ ret i64 %tmp2
+}
+
+; Check 64-bit multiplication is used for constants > 32 bits.
+define i64 @t10(i32 %a) nounwind {
+entry:
+; CHECK-LABEL: t10:
+; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+ %tmp1 = sext i32 %a to i64
+ %tmp2 = mul i64 %tmp1, 2147483650 ; = 2^31 + 2
+ ret i64 %tmp2
+}
+
+; Check the sext_inreg case.
+define i64 @t11(i64 %a) nounwind {
+entry:
+; CHECK-LABEL: t11:
+; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+ %tmp1 = trunc i64 %a to i32
+ %tmp2 = sext i32 %tmp1 to i64
+ %tmp3 = mul i64 %tmp2, -2395238
+ %tmp4 = sub i64 0, %tmp3
+ ret i64 %tmp4
+}
+
+define i64 @t12(i64 %a, i64 %b) nounwind {
+entry:
+; CHECK-LABEL: t12:
+; CHECK: smaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+ %tmp1 = trunc i64 %a to i32
+ %tmp2 = sext i32 %tmp1 to i64
+ %tmp3 = mul i64 %tmp2, -34567890
+ %tmp4 = add i64 %b, %tmp3
+ ret i64 %tmp4
+}
+
+define i64 @t13(i32 %a, i64 %b) nounwind {
+entry:
+; CHECK-LABEL: t13:
+; CHECK: umsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+ %tmp1 = zext i32 %a to i64
+ %tmp3 = mul i64 %tmp1, 12345678
+ %tmp4 = sub i64 %b, %tmp3
+ ret i64 %tmp4
+}
+
+define i64 @t14(i32 %a, i64 %b) nounwind {
+entry:
+; CHECK-LABEL: t14:
+; CHECK: smsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+ %tmp1 = sext i32 %a to i64
+ %tmp3 = mul i64 %tmp1, -12345678
+ %tmp4 = sub i64 %b, %tmp3
+ ret i64 %tmp4
+}
diff --git a/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll b/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
index 5276ac334a71..be5b7e9b2966 100644
--- a/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
+++ b/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE
-; RUN: llc < %s -mtriple aarch64_be--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=BE
+; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=cortex-a57 -verify-machineinstrs -enable-narrow-ld-merge=true | FileCheck %s --check-prefix=CHECK --check-prefix=LE
+; RUN: llc < %s -mtriple aarch64_be--none-eabi -mcpu=cortex-a57 -verify-machineinstrs -enable-narrow-ld-merge=true | FileCheck %s --check-prefix=CHECK --check-prefix=BE
+; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=kryo -verify-machineinstrs -enable-narrow-ld-merge=true | FileCheck %s --check-prefix=CHECK --check-prefix=LE
; CHECK-LABEL: Ldrh_merge
; CHECK-NOT: ldrh
@@ -352,6 +353,56 @@ entry:
ret void
}
+; CHECK-LABEL: Strw_zero
+; CHECK: str xzr
+define void @Strw_zero(i32* nocapture %P, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %add = add nsw i32 %n, 1
+ %idxprom1 = sext i32 %add to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+ store i32 0, i32* %arrayidx2
+ ret void
+}
+
+; CHECK-LABEL: Strw_zero_nonzero
+; CHECK: stp wzr, w1
+define void @Strw_zero_nonzero(i32* nocapture %P, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %add = add nsw i32 %n, 1
+ %idxprom1 = sext i32 %add to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+ store i32 %n, i32* %arrayidx2
+ ret void
+}
+
+; CHECK-LABEL: Strw_zero_4
+; CHECK: stp xzr
+define void @Strw_zero_4(i32* nocapture %P, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %add = add nsw i32 %n, 1
+ %idxprom1 = sext i32 %add to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
+ store i32 0, i32* %arrayidx2
+ %add3 = add nsw i32 %n, 2
+ %idxprom4 = sext i32 %add3 to i64
+ %arrayidx5 = getelementptr inbounds i32, i32* %P, i64 %idxprom4
+ store i32 0, i32* %arrayidx5
+ %add6 = add nsw i32 %n, 3
+ %idxprom7 = sext i32 %add6 to i64
+ %arrayidx8 = getelementptr inbounds i32, i32* %P, i64 %idxprom7
+ store i32 0, i32* %arrayidx8
+ ret void
+}
+
; CHECK-LABEL: Sturb_zero
; CHECK: sturh wzr
define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 {
@@ -404,3 +455,42 @@ entry:
store i16 0, i16* %arrayidx9
ret void
}
+
+; CHECK-LABEL: Sturw_zero
+; CHECK: stur xzr
+define void @Sturw_zero(i32* nocapture %P, i32 %n) {
+entry:
+ %sub = add nsw i32 %n, -3
+ %idxprom = sext i32 %sub to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %sub1 = add nsw i32 %n, -4
+ %idxprom2 = sext i32 %sub1 to i64
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
+ store i32 0, i32* %arrayidx3
+ ret void
+}
+
+; CHECK-LABEL: Sturw_zero_4
+; CHECK: stp xzr, xzr
+define void @Sturw_zero_4(i32* nocapture %P, i32 %n) {
+entry:
+ %sub = add nsw i32 %n, -3
+ %idxprom = sext i32 %sub to i64
+ %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+ store i32 0, i32* %arrayidx
+ %sub1 = add nsw i32 %n, -4
+ %idxprom2 = sext i32 %sub1 to i64
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
+ store i32 0, i32* %arrayidx3
+ %sub4 = add nsw i32 %n, -2
+ %idxprom5 = sext i32 %sub4 to i64
+ %arrayidx6 = getelementptr inbounds i32, i32* %P, i64 %idxprom5
+ store i32 0, i32* %arrayidx6
+ %sub7 = add nsw i32 %n, -1
+ %idxprom8 = sext i32 %sub7 to i64
+ %arrayidx9 = getelementptr inbounds i32, i32* %P, i64 %idxprom8
+ store i32 0, i32* %arrayidx9
+ ret void
+}
+
diff --git a/test/CodeGen/AArch64/arm64-neon-2velem-high.ll b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
index 3ff1e61d0298..575acf723753 100644
--- a/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
+++ b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
@@ -18,7 +18,7 @@ entry:
define <4 x i32> @test_vmull_high_n_s16_imm(<8 x i16> %a) #0 {
; CHECK-LABEL: test_vmull_high_n_s16_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
; CHECK-NEXT: ret
entry:
@@ -42,7 +42,7 @@ entry:
define <2 x i64> @test_vmull_high_n_s32_imm(<4 x i32> %a) #0 {
; CHECK-LABEL: test_vmull_high_n_s32_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #1, msl #8
; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
; CHECK-NEXT: ret
entry:
@@ -68,7 +68,7 @@ entry:
define <4 x i32> @test_vmull_high_n_u16_imm(<8 x i16> %a) #0 {
; CHECK-LABEL: test_vmull_high_n_u16_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #17, lsl #8
; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
; CHECK-NEXT: ret
entry:
@@ -92,7 +92,7 @@ entry:
define <2 x i64> @test_vmull_high_n_u32_imm(<4 x i32> %a) #0 {
; CHECK-LABEL: test_vmull_high_n_u32_imm:
-; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8
+; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].4s, #1, msl #8
; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
; CHECK-NEXT: ret
entry:
@@ -118,7 +118,7 @@ entry:
define <4 x i32> @test_vqdmull_high_n_s16_imm(<8 x i16> %a) #0 {
; CHECK-LABEL: test_vqdmull_high_n_s16_imm:
-; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8
+; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].8h, #17, lsl #8
; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
; CHECK-NEXT: ret
entry:
@@ -142,7 +142,7 @@ entry:
define <2 x i64> @test_vqdmull_high_n_s32_imm(<4 x i32> %a) #0 {
; CHECK-LABEL: test_vqdmull_high_n_s32_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
; CHECK-NEXT: ret
entry:
@@ -169,7 +169,7 @@ entry:
define <4 x i32> @test_vmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
; CHECK-LABEL: test_vmlal_high_n_s16_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
; CHECK-NEXT: ret
entry:
@@ -195,7 +195,7 @@ entry:
define <2 x i64> @test_vmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
; CHECK-LABEL: test_vmlal_high_n_s32_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
; CHECK-NEXT: ret
entry:
@@ -223,7 +223,7 @@ entry:
define <4 x i32> @test_vmlal_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
; CHECK-LABEL: test_vmlal_high_n_u16_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
; CHECK-NEXT: ret
entry:
@@ -249,7 +249,7 @@ entry:
define <2 x i64> @test_vmlal_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
; CHECK-LABEL: test_vmlal_high_n_u32_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
; CHECK-NEXT: ret
entry:
@@ -277,7 +277,7 @@ entry:
define <4 x i32> @test_vqdmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
; CHECK-LABEL: test_vqdmlal_high_n_s16_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
; CHECK-NEXT: ret
entry:
@@ -303,7 +303,7 @@ entry:
define <2 x i64> @test_vqdmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
; CHECK-LABEL: test_vqdmlal_high_n_s32_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
; CHECK-NEXT: ret
entry:
@@ -331,7 +331,7 @@ entry:
define <4 x i32> @test_vmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
; CHECK-LABEL: test_vmlsl_high_n_s16_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
; CHECK-NEXT: ret
entry:
@@ -357,7 +357,7 @@ entry:
define <2 x i64> @test_vmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
; CHECK-LABEL: test_vmlsl_high_n_s32_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
; CHECK-NEXT: ret
entry:
@@ -385,7 +385,7 @@ entry:
define <4 x i32> @test_vmlsl_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
; CHECK-LABEL: test_vmlsl_high_n_u16_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
; CHECK-NEXT: ret
entry:
@@ -411,7 +411,7 @@ entry:
define <2 x i64> @test_vmlsl_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
; CHECK-LABEL: test_vmlsl_high_n_u32_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
; CHECK-NEXT: ret
entry:
@@ -439,7 +439,7 @@ entry:
define <4 x i32> @test_vqdmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
; CHECK-LABEL: test_vqdmlsl_high_n_s16_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
; CHECK-NEXT: ret
entry:
@@ -465,7 +465,7 @@ entry:
define <2 x i64> @test_vqdmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
; CHECK-LABEL: test_vqdmlsl_high_n_s32_imm:
-; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d
+; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
; CHECK-NEXT: ret
entry:
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index 83b1cac70f5c..e91a1a42c233 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -902,6 +902,43 @@ define <8 x i8> @getl(<16 x i8> %x) #0 {
ret <8 x i8> %vecinit14
}
+; CHECK-LABEL: test_extracts_inserts_varidx_extract:
+; CHECK: str q0
+; CHECK: add x[[PTR:[0-9]+]], {{.*}}, w0, sxtw #1
+; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], [x[[PTR]]]
+; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
+; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
+; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
+define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
+ %tmp = extractelement <8 x i16> %x, i32 %idx
+ %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
+ %tmp3 = extractelement <8 x i16> %x, i32 1
+ %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
+ %tmp5 = extractelement <8 x i16> %x, i32 2
+ %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
+ %tmp7 = extractelement <8 x i16> %x, i32 3
+ %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
+ ret <4 x i16> %tmp8
+}
+
+; CHECK-LABEL: test_extracts_inserts_varidx_insert:
+; CHECK: str h0, [{{.*}}, w0, sxtw #1]
+; CHECK-DAG: ldr d[[R:[0-9]+]]
+; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
+; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
+; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
+define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
+ %tmp = extractelement <8 x i16> %x, i32 0
+ %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
+ %tmp3 = extractelement <8 x i16> %x, i32 1
+ %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
+ %tmp5 = extractelement <8 x i16> %x, i32 2
+ %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
+ %tmp7 = extractelement <8 x i16> %x, i32 3
+ %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
+ ret <4 x i16> %tmp8
+}
+
define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
; CHECK-LABEL: test_dup_v2i32_v4i16:
; CHECK: dup v0.4h, v0.h[2]
@@ -1368,7 +1405,7 @@ define <4 x i16> @concat_vector_v4i16_const() {
define <4 x i16> @concat_vector_v4i16_const_one() {
; CHECK-LABEL: concat_vector_v4i16_const_one:
-; CHECK: movi {{v[0-9]+}}.4h, #0x1
+; CHECK: movi {{v[0-9]+}}.4h, #1
%r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %r
}
@@ -1396,7 +1433,7 @@ define <8 x i16> @concat_vector_v8i16_const() {
define <8 x i16> @concat_vector_v8i16_const_one() {
; CHECK-LABEL: concat_vector_v8i16_const_one:
-; CHECK: movi {{v[0-9]+}}.8h, #0x1
+; CHECK: movi {{v[0-9]+}}.8h, #1
%r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %r
}
diff --git a/test/CodeGen/AArch64/arm64-nvcast.ll b/test/CodeGen/AArch64/arm64-nvcast.ll
index 3cb1bf25fc34..c3a1640ab012 100644
--- a/test/CodeGen/AArch64/arm64-nvcast.ll
+++ b/test/CodeGen/AArch64/arm64-nvcast.ll
@@ -2,7 +2,7 @@
; CHECK-LABEL: _test:
; CHECK: fmov.2d v0, #2.00000000
-; CHECK: str q0, [sp]
+; CHECK: str q0, [sp, #-16]!
; CHECK: mov x8, sp
; CHECK: ldr s0, [x8, w1, sxtw #2]
; CHECK: str s0, [x0]
@@ -15,8 +15,8 @@ entry:
}
; CHECK-LABEL: _test2
-; CHECK: movi.16b v0, #0x3f
-; CHECK: str q0, [sp]
+; CHECK: movi.16b v0, #63
+; CHECK: str q0, [sp, #-16]!
; CHECK: mov x8, sp
; CHECK: ldr s0, [x8, w1, sxtw #2]
; CHECK: str s0, [x0]
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
index c2006ccdd064..caf4498276ce 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
@@ -1,5 +1,12 @@
-; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone < %s | FileCheck %s
; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -fast-isel < %s | FileCheck %s --check-prefix=FAST
+; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -filetype=obj -o %t %s
+; RUN: llvm-objdump -triple arm64-apple-darwin -d %t | FileCheck %s --check-prefix CHECK-ENCODING
+
+; CHECK-ENCODING-NOT: <unknown>
+; CHECK-ENCODING: mov x16, #281470681743360
+; CHECK-ENCODING: movk x16, #57005, lsl #16
+; CHECK-ENCODING: movk x16, #48879
; One argument will be passed in register, the other will be pushed on the stack.
; Return value in x0.
@@ -7,20 +14,20 @@ define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: jscall_patchpoint_codegen:
; CHECK: Ltmp
-; CHECK: str x{{.+}}, [sp, #-16]!
+; CHECK: str x{{.+}}, [sp]
; CHECK-NEXT: mov x0, x{{.+}}
; CHECK: Ltmp
-; CHECK-NEXT: movz x16, #0xffff, lsl #32
-; CHECK-NEXT: movk x16, #0xdead, lsl #16
-; CHECK-NEXT: movk x16, #0xbeef
+; CHECK-NEXT: mov x16, #281470681743360
+; CHECK: movk x16, #57005, lsl #16
+; CHECK: movk x16, #48879
; CHECK-NEXT: blr x16
; FAST-LABEL: jscall_patchpoint_codegen:
; FAST: Ltmp
-; FAST: str x{{.+}}, [sp, #-16]!
+; FAST: str x{{.+}}, [sp]
; FAST: Ltmp
-; FAST-NEXT: movz x16, #0xffff, lsl #32
-; FAST-NEXT: movk x16, #0xdead, lsl #16
-; FAST-NEXT: movk x16, #0xbeef
+; FAST-NEXT: mov x16, #281470681743360
+; FAST-NEXT: movk x16, #57005, lsl #16
+; FAST-NEXT: movk x16, #48879
; FAST-NEXT: blr x16
%resolveCall2 = inttoptr i64 281474417671919 to i8*
%result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
@@ -41,22 +48,22 @@ entry:
; CHECK-NEXT: orr w[[REG:[0-9]+]], wzr, #0x2
; CHECK-NEXT: str x[[REG]], [sp]
; CHECK: Ltmp
-; CHECK-NEXT: movz x16, #0xffff, lsl #32
-; CHECK-NEXT: movk x16, #0xdead, lsl #16
-; CHECK-NEXT: movk x16, #0xbeef
+; CHECK-NEXT: mov x16, #281470681743360
+; CHECK-NEXT: movk x16, #57005, lsl #16
+; CHECK-NEXT: movk x16, #48879
; CHECK-NEXT: blr x16
; FAST-LABEL: jscall_patchpoint_codegen2:
; FAST: Ltmp
; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2
; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4
; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6
-; FAST-NEXT: str [[REG1]], [sp, #-32]!
+; FAST-NEXT: str [[REG1]], [sp]
; FAST-NEXT: str [[REG2]], [sp, #16]
; FAST-NEXT: str [[REG3]], [sp, #24]
; FAST: Ltmp
-; FAST-NEXT: movz x16, #0xffff, lsl #32
-; FAST-NEXT: movk x16, #0xdead, lsl #16
-; FAST-NEXT: movk x16, #0xbeef
+; FAST-NEXT: mov x16, #281470681743360
+; FAST-NEXT: movk x16, #57005, lsl #16
+; FAST-NEXT: movk x16, #48879
; FAST-NEXT: blr x16
%call = inttoptr i64 281474417671919 to i8*
%result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
@@ -68,7 +75,7 @@ define i64 @jscall_patchpoint_codegen3(i64 %callee) {
entry:
; CHECK-LABEL: jscall_patchpoint_codegen3:
; CHECK: Ltmp
-; CHECK: movz w[[REG:[0-9]+]], #0xa
+; CHECK: mov w[[REG:[0-9]+]], #10
; CHECK-NEXT: str x[[REG]], [sp, #48]
; CHECK-NEXT: orr w[[REG:[0-9]+]], wzr, #0x8
; CHECK-NEXT: str w[[REG]], [sp, #36]
@@ -79,9 +86,9 @@ entry:
; CHECK-NEXT: orr w[[REG:[0-9]+]], wzr, #0x2
; CHECK-NEXT: str x[[REG]], [sp]
; CHECK: Ltmp
-; CHECK-NEXT: movz x16, #0xffff, lsl #32
-; CHECK-NEXT: movk x16, #0xdead, lsl #16
-; CHECK-NEXT: movk x16, #0xbeef
+; CHECK-NEXT: mov x16, #281470681743360
+; CHECK-NEXT: movk x16, #57005, lsl #16
+; CHECK-NEXT: movk x16, #48879
; CHECK-NEXT: blr x16
; FAST-LABEL: jscall_patchpoint_codegen3:
; FAST: Ltmp
@@ -89,16 +96,16 @@ entry:
; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4
; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6
; FAST-NEXT: orr [[REG4:w[0-9]+]], wzr, #0x8
-; FAST-NEXT: movz [[REG5:x[0-9]+]], #0xa
-; FAST-NEXT: str [[REG1]], [sp, #-64]!
+; FAST-NEXT: mov [[REG5:x[0-9]+]], #10
+; FAST-NEXT: str [[REG1]], [sp]
; FAST-NEXT: str [[REG2]], [sp, #16]
; FAST-NEXT: str [[REG3]], [sp, #24]
; FAST-NEXT: str [[REG4]], [sp, #36]
; FAST-NEXT: str [[REG5]], [sp, #48]
; FAST: Ltmp
-; FAST-NEXT: movz x16, #0xffff, lsl #32
-; FAST-NEXT: movk x16, #0xdead, lsl #16
-; FAST-NEXT: movk x16, #0xbeef
+; FAST-NEXT: mov x16, #281470681743360
+; FAST-NEXT: movk x16, #57005, lsl #16
+; FAST-NEXT: movk x16, #48879
; FAST-NEXT: blr x16
%call = inttoptr i64 281474417671919 to i8*
%result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
@@ -115,4 +122,3 @@ define webkit_jscc zeroext i16 @test_i16(i16 zeroext %a, i16 zeroext %b) {
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
-
diff --git a/test/CodeGen/AArch64/arm64-patchpoint.ll b/test/CodeGen/AArch64/arm64-patchpoint.ll
index d9ec7e50ff80..2f9004bb22e6 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint.ll
@@ -6,13 +6,13 @@
define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: trivial_patchpoint_codegen:
-; CHECK: movz x16, #0xdead, lsl #32
-; CHECK-NEXT: movk x16, #0xbeef, lsl #16
-; CHECK-NEXT: movk x16, #0xcafe
+; CHECK: mov x16, #244834610708480
+; CHECK-NEXT: movk x16, #48879, lsl #16
+; CHECK-NEXT: movk x16, #51966
; CHECK-NEXT: blr x16
-; CHECK: movz x16, #0xdead, lsl #32
-; CHECK-NEXT: movk x16, #0xbeef, lsl #16
-; CHECK-NEXT: movk x16, #0xcaff
+; CHECK: mov x16, #244834610708480
+; CHECK-NEXT: movk x16, #48879, lsl #16
+; CHECK-NEXT: movk x16, #51967
; CHECK-NEXT: blr x16
; CHECK: ret
%resolveCall2 = inttoptr i64 244837814094590 to i8*
@@ -26,10 +26,11 @@ entry:
; as a leaf function.
;
; CHECK-LABEL: caller_meta_leaf
-; CHECK: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #32
+; CHECK: sub sp, sp, #48
+; CHECK-NEXT: stp x29, x30, [sp, #32]
+; CHECK-NEXT: add x29, sp, #32
; CHECK: Ltmp
-; CHECK: mov sp, x29
+; CHECK: add sp, sp, #48
; CHECK: ret
define void @caller_meta_leaf() {
diff --git a/test/CodeGen/AArch64/arm64-register-pairing.ll b/test/CodeGen/AArch64/arm64-register-pairing.ll
index 99defb1aad7c..eac7e5cb3363 100644
--- a/test/CodeGen/AArch64/arm64-register-pairing.ll
+++ b/test/CodeGen/AArch64/arm64-register-pairing.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck -check-prefix CHECK-NOTMACHO %s
;
; rdar://14075006
@@ -13,7 +14,7 @@ define void @odd() nounwind {
; CHECK: stp x24, x23, [sp, #96]
; CHECK: stp x22, x21, [sp, #112]
; CHECK: stp x20, x19, [sp, #128]
-; CHECK: movz x0, #0x2a
+; CHECK: mov x0, #42
; CHECK: ldp x20, x19, [sp, #128]
; CHECK: ldp x22, x21, [sp, #112]
; CHECK: ldp x24, x23, [sp, #96]
@@ -23,6 +24,19 @@ define void @odd() nounwind {
; CHECK: ldp d11, d10, [sp, #32]
; CHECK: ldp d13, d12, [sp, #16]
; CHECK: ldp d15, d14, [sp], #144
+
+; CHECK-NOTMACHO-LABEL: odd:
+; CHECK-NOTMACHO: stp d14, d12, [sp, #-80]!
+; CHECK-NOTMACHO: stp d10, d8, [sp, #16]
+; CHECK-NOTMACHO: str x27, [sp, #32]
+; CHECK-NOTMACHO: stp x25, x23, [sp, #48]
+; CHECK-NOTMACHO: stp x21, x19, [sp, #64]
+; CHECK-NOTMACHO: mov x0, #42
+; CHECK-NOTMACHO: ldp x21, x19, [sp, #64]
+; CHECK-NOTMACHO: ldp x25, x23, [sp, #48]
+; CHECK-NOTMACHO: ldr x27, [sp, #32]
+; CHECK-NOTMACHO: ldp d10, d8, [sp, #16]
+; CHECK-NOTMACHO: ldp d14, d12, [sp], #80
call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind
ret void
}
@@ -38,7 +52,7 @@ define void @even() nounwind {
; CHECK: stp x24, x23, [sp, #96]
; CHECK: stp x22, x21, [sp, #112]
; CHECK: stp x20, x19, [sp, #128]
-; CHECK: movz x0, #0x2a
+; CHECK: mov x0, #42
; CHECK: ldp x20, x19, [sp, #128]
; CHECK: ldp x22, x21, [sp, #112]
; CHECK: ldp x24, x23, [sp, #96]
@@ -48,6 +62,19 @@ define void @even() nounwind {
; CHECK: ldp d11, d10, [sp, #32]
; CHECK: ldp d13, d12, [sp, #16]
; CHECK: ldp d15, d14, [sp], #144
+
+; CHECK-NOTMACHO-LABEL: even:
+; CHECK-NOTMACHO: stp d15, d13, [sp, #-80]!
+; CHECK-NOTMACHO: stp d11, d9, [sp, #16]
+; CHECK-NOTMACHO: str x28, [sp, #32]
+; CHECK-NOTMACHO: stp x26, x24, [sp, #48]
+; CHECK-NOTMACHO: stp x22, x20, [sp, #64]
+; CHECK-NOTMACHO: mov x0, #42
+; CHECK-NOTMACHO: ldp x22, x20, [sp, #64]
+; CHECK-NOTMACHO: ldp x26, x24, [sp, #48]
+; CHECK-NOTMACHO: ldr x28, [sp, #32]
+; CHECK-NOTMACHO: ldp d11, d9, [sp, #16]
+; CHECK-NOTMACHO: ldp d15, d13, [sp], #80
call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir b/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir
new file mode 100644
index 000000000000..3948c0457bcd
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir
@@ -0,0 +1,42 @@
+# RUN: rm -f %S/arm64-regress-opt-cmp.s
+# RUN: llc -mtriple=aarch64-linux-gnu -run-pass peephole-opt -o - %s 2>&1 | FileCheck %s
+# CHECK: %1 = ANDWri {{.*}}
+# CHECK-NEXT: %wzr = SUBSWri {{.*}}
+--- |
+ define i32 @test01() nounwind {
+ entry:
+ %0 = select i1 true, i32 1, i32 0
+ %1 = and i32 %0, 65535
+ %2 = icmp ugt i32 %1, 0
+ br i1 %2, label %if.then, label %if.end
+
+ if.then: ; preds = %entry
+ ret i32 1
+
+ if.end: ; preds = %entry
+ ret i32 0
+ }
+...
+---
+name: test01
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32common }
+body: |
+ bb.0.entry:
+ successors: %bb.2.if.end, %bb.1.if.then
+
+ %0 = MOVi32imm 1
+ %1 = ANDWri killed %1, 15
+ %wzr = SUBSWri killed %1, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.2.if.end, implicit %nzcv
+
+ bb.1.if.then:
+ %w0 = MOVi32imm 1
+ RET_ReallyLR implicit %w0
+
+ bb.2.if.end:
+ %w0 = MOVi32imm 0
+ RET_ReallyLR implicit %w0
+
+...
diff --git a/test/CodeGen/AArch64/arm64-rev.ll b/test/CodeGen/AArch64/arm64-rev.ll
index 74356d76d3c8..4980d7e3b275 100644
--- a/test/CodeGen/AArch64/arm64-rev.ll
+++ b/test/CodeGen/AArch64/arm64-rev.ll
@@ -16,6 +16,33 @@ entry:
ret i64 %0
}
+; Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 16-bits
+; of %a are zero. This optimizes rev + lsr 16 to rev16.
+define i32 @test_rev_w_srl16(i16 %a) {
+entry:
+; CHECK-LABEL: test_rev_w_srl16:
+; CHECK: and [[REG:w[0-9]+]], w0, #0xffff
+; CHECK: rev16 w0, [[REG]]
+; CHECK-NOT: lsr
+ %0 = zext i16 %a to i32
+ %1 = tail call i32 @llvm.bswap.i32(i32 %0)
+ %2 = lshr i32 %1, 16
+ ret i32 %2
+}
+
+; Canonicalize (srl (bswap x), 32) to (rotr (bswap x), 32) if the high 32-bits
+; of %a are zero. This optimizes rev + lsr 32 to rev32.
+define i64 @test_rev_x_srl32(i32 %a) {
+entry:
+; CHECK-LABEL: test_rev_x_srl32:
+; CHECK: rev32 x0, {{x[0-9]+}}
+; CHECK-NOT: lsr
+ %0 = zext i32 %a to i64
+ %1 = tail call i64 @llvm.bswap.i64(i64 %0)
+ %2 = lshr i64 %1, 32
+ ret i64 %2
+}
+
declare i32 @llvm.bswap.i32(i32) nounwind readnone
declare i64 @llvm.bswap.i64(i64) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 4d751f501d4a..16ae7ef8e1b7 100644
--- a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -1,5 +1,5 @@
-; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
-; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios"
@@ -13,9 +13,9 @@ target triple = "arm64-apple-ios"
; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]
;
; Prologue code.
-; CHECK: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #-16]!
-; CHECK-NEXT: mov [[SAVE_SP]], sp
-; CHECK-NEXT: sub sp, sp, #16
+; CHECK: sub sp, sp, #32
+; CHECK-NEXT: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #16]
+; CHECK-NEXT: add [[SAVE_SP]], sp, #16
;
; Compare the arguments and jump to exit.
; After the prologue is set.
@@ -29,12 +29,12 @@ target triple = "arm64-apple-ios"
; Set the first argument to zero.
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: bl _doSomething
-;
+;
; Without shrink-wrapping, epilogue is in the exit block.
; DISABLE: [[EXIT_LABEL]]:
; Epilogue code.
-; CHECK-NEXT: mov sp, [[SAVE_SP]]
-; CHECK-NEXT: ldp [[SAVE_SP]], [[CSR]], [sp], #16
+; CHECK-NEXT: ldp x{{[0-9]+}}, [[CSR]], [sp, #16]
+; CHECK-NEXT: add sp, sp, #32
;
; With shrink-wrapping, exit block is a simple return.
; ENABLE: [[EXIT_LABEL]]:
@@ -73,7 +73,7 @@ declare i32 @doSomething(i32, i32*)
; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
;
; CHECK: mov [[SUM:w[0-9]+]], wzr
-; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
+; CHECK-NEXT: mov [[IV:w[0-9]+]], #10
;
; Next BB.
; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body
@@ -140,7 +140,7 @@ declare i32 @something(...)
; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
; CHECK: mov [[SUM:w[0-9]+]], wzr
-; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
+; CHECK-NEXT: mov [[IV:w[0-9]+]], #10
; Next BB.
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
; CHECK: bl _something
@@ -184,7 +184,7 @@ for.end: ; preds = %for.body
; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
;
; CHECK: mov [[SUM:w[0-9]+]], wzr
-; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
+; CHECK-NEXT: mov [[IV:w[0-9]+]], #10
;
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
; CHECK: bl _something
@@ -255,7 +255,7 @@ declare void @somethingElse(...)
;
; CHECK: bl _somethingElse
; CHECK-NEXT: mov [[SUM:w[0-9]+]], wzr
-; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
+; CHECK-NEXT: mov [[IV:w[0-9]+]], #10
;
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
; CHECK: bl _something
@@ -332,11 +332,11 @@ entry:
; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
;
; Sum is merged with the returned register.
-; CHECK: mov [[SUM:w0]], wzr
-; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16
+; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16
; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
; CHECK-NEXT: cmp w1, #1
; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]]
+; CHECK: mov [[SUM:w0]], wzr
;
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8]
@@ -347,18 +347,18 @@ entry:
; CHECK-NEXT: sub w1, w1, #1
; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
;
-; DISABLE-NEXT: b [[IFEND_LABEL]]
+; DISABLE-NEXT: b
; DISABLE: [[ELSE_LABEL]]: ; %if.else
; DISABLE: lsl w0, w1, #1
;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; ENABLE: lsl w0, w1, #1
+; ENABLE-NEXT: ret
+;
; CHECK: [[IFEND_LABEL]]:
; Epilogue code.
; CHECK: add sp, sp, #16
; CHECK-NEXT: ret
-;
-; ENABLE: [[ELSE_LABEL]]: ; %if.else
-; ENABLE: lsl w0, w1, #1
-; ENABLE-NEXT: ret
define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
entry:
%ap = alloca i8*, align 8
@@ -409,7 +409,7 @@ declare void @llvm.va_end(i8*)
;
; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
;
-; CHECK: movz [[IV:w[0-9]+]], #0xa
+; CHECK: mov [[IV:w[0-9]+]], #10
;
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
; Inline asm statement.
@@ -454,9 +454,9 @@ if.end: ; preds = %for.body, %if.else
; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
;
; Prologue code.
-; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-16]!
-; CHECK-NEXT: mov [[NEW_SP:x[0-9]+]], sp
-; CHECK-NEXT: sub sp, sp, #48
+; CHECK: sub sp, sp, #64
+; CHECK-NEXT: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #48]
+; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #48
;
; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
; Setup of the varags.
@@ -473,8 +473,8 @@ if.end: ; preds = %for.body, %if.else
; DISABLE: [[IFEND_LABEL]]: ; %if.end
;
; Epilogue code.
-; CHECK: mov sp, [[NEW_SP]]
-; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
+; CHECK: ldp [[CSR1]], [[CSR2]], [sp, #48]
+; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
;
; ENABLE: [[ELSE_LABEL]]: ; %if.else
@@ -511,7 +511,7 @@ declare i32 @someVariadicFunc(i32, ...)
; CHECK: and [[TEST:w[0-9]+]], w0, #0xff
; CHECK-NEXT: cbnz [[TEST]], [[ABORT:LBB[0-9_]+]]
;
-; CHECK: movz w0, #0x2a
+; CHECK: mov w0, #42
;
; DISABLE-NEXT: ldp
;
@@ -631,16 +631,20 @@ end:
ret void
}
-; Don't do shrink-wrapping when we need to re-align the stack pointer.
-; See bug 26642.
+; Re-aligned stack pointer. See bug 26642. Avoid clobbering live
+; values in the prologue when re-aligning the stack pointer.
; CHECK-LABEL: stack_realign:
-; CHECK-NOT: lsl w[[LSL1:[0-9]+]], w0, w1
-; CHECK-NOT: lsl w[[LSL2:[0-9]+]], w1, w0
+; ENABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
+; ENABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
+; DISABLE-NOT: lsl w[[LSL1:[0-9]+]], w0, w1
+; DISABLE-NOT: lsl w[[LSL2:[0-9]+]], w1, w0
; CHECK: stp x29, x30, [sp, #-16]!
; CHECK: mov x29, sp
-; CHECK: sub x{{[0-9]+}}, sp, #16
-; CHECK-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
-; CHECK-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
+; ENABLE-NOT: sub x[[LSL1]], sp, #16
+; ENABLE-NOT: sub x[[LSL2]], sp, #16
+; DISABLE: sub x{{[0-9]+}}, sp, #16
+; DISABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
+; DISABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
; CHECK-DAG: str w[[LSL1]],
; CHECK-DAG: str w[[LSL2]],
diff --git a/test/CodeGen/AArch64/arm64-stp-aa.ll b/test/CodeGen/AArch64/arm64-stp-aa.ll
index 82d343d976b5..2a45745fedb5 100644
--- a/test/CodeGen/AArch64/arm64-stp-aa.ll
+++ b/test/CodeGen/AArch64/arm64-stp-aa.ll
@@ -109,3 +109,37 @@ define double @stp_double_aa_after(double %d0, double %a, double %b, double* noc
store double %b, double* %add.ptr, align 8
ret double %tmp
}
+
+; Check that the stores %c and %d are paired after the fadd instruction,
+; and then the stores %a and %d are paired after proving that they do not
+; depend on the the (%c, %d) pair.
+;
+; CHECK-LABEL: st1:
+; CHECK: stp q0, q1, [x{{[0-9]+}}]
+; CHECK: fadd
+; CHECK: stp q2, q0, [x{{[0-9]+}}, #32]
+define void @st1(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %base, i64 %index) {
+entry:
+ %a0 = getelementptr inbounds float, float* %base, i64 %index
+ %b0 = getelementptr float, float* %a0, i64 4
+ %c0 = getelementptr float, float* %a0, i64 8
+ %d0 = getelementptr float, float* %a0, i64 12
+
+ %a1 = bitcast float* %a0 to <4 x float>*
+ %b1 = bitcast float* %b0 to <4 x float>*
+ %c1 = bitcast float* %c0 to <4 x float>*
+ %d1 = bitcast float* %d0 to <4 x float>*
+
+ store <4 x float> %c, <4 x float> * %c1, align 4
+ store <4 x float> %a, <4 x float> * %a1, align 4
+
+ ; This fadd forces the compiler to pair %c and %e after fadd, and leave the
+ ; stores %a and %b separated by a stp. The dependence analysis needs then to
+ ; prove that it is safe to move %b past the stp to be paired with %a.
+ %e = fadd fast <4 x float> %d, %a
+
+ store <4 x float> %e, <4 x float>* %d1, align 4
+ store <4 x float> %b, <4 x float>* %b1, align 4
+
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll
index 98242d0bb57e..5664c7d118c3 100644
--- a/test/CodeGen/AArch64/arm64-stp.ll
+++ b/test/CodeGen/AArch64/arm64-stp.ll
@@ -100,9 +100,9 @@ entry:
; Read of %b to compute %tmp2 shouldn't prevent formation of stp
; CHECK-LABEL: stp_int_rar_hazard
-; CHECK: stp w0, w1, [x2]
; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]
-; CHECK: add w0, [[REG]], w1
+; CHECK: add w8, [[REG]], w1
+; CHECK: stp w0, w1, [x2]
; CHECK: ret
define i32 @stp_int_rar_hazard(i32 %a, i32 %b, i32* nocapture %p) nounwind {
store i32 %a, i32* %p, align 4
diff --git a/test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll b/test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll
index 4ab2bee0ed16..2eedde557644 100644
--- a/test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll
+++ b/test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll
@@ -4,7 +4,7 @@
; getting both the endianness wrong and the element indexing wrong.
define <8 x i16> @foo(<8 x i16> %a) nounwind readnone {
; CHECK: .section __TEXT,__literal16,16byte_literals
-; CHECK: .align 4
+; CHECK: .p2align 4
; CHECK:lCPI0_0:
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 1 ; 0x1
@@ -24,7 +24,7 @@ define <8 x i16> @foo(<8 x i16> %a) nounwind readnone {
; CHECK: .byte 9 ; 0x9
; CHECK: .section __TEXT,__text,regular,pure_instructions
; CHECK: .globl _foo
-; CHECK: .align 2
+; CHECK: .p2align 2
; CHECK:_foo: ; @foo
; CHECK: adrp [[BASE:x[0-9]+]], lCPI0_0@PAGE
; CHECK: ldr q[[REG:[0-9]+]], {{\[}}[[BASE]], lCPI0_0@PAGEOFF]
diff --git a/test/CodeGen/AArch64/arm64-this-return.ll b/test/CodeGen/AArch64/arm64-this-return.ll
index 3be1a69237d7..9fc68f476b77 100644
--- a/test/CodeGen/AArch64/arm64-this-return.ll
+++ b/test/CodeGen/AArch64/arm64-this-return.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-this-return-forwarding | FileCheck %s
%struct.A = type { i8 }
%struct.B = type { i32 }
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
index c95eca062ff6..bb9ad46ba63d 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
@@ -40,7 +40,7 @@ define i32 @test_emulated_init() {
; EMU-NOT: __emutls_v.general_dynamic_var:
-; EMU: .align 3
+; EMU: .p2align 3
; EMU-LABEL: __emutls_v.emulated_init_var:
; EMU-NEXT: .xword 4
; EMU-NEXT: .xword 8
diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
index 8702b41023d0..16ddf690fe95 100644
--- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
@@ -32,7 +32,7 @@ define void @test_simple(i32 %n, ...) {
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
-; CHECK: movn [[GR_OFFS:w[0-9]+]], #0x37
+; CHECK: mov [[GR_OFFS:w[0-9]+]], #-56
; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
; CHECK: orr [[VR_OFFS:w[0-9]+]], wzr, #0xffffff80
@@ -70,10 +70,10 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112
; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
-; CHECK: movn [[GR_OFFS:w[0-9]+]], #0x27
+; CHECK: mov [[GR_OFFS:w[0-9]+]], #-40
; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
-; CHECK: movn [[VR_OFFS:w[0-9]+]], #0x6f
+; CHECK: mov [[VR_OFFS:w[0-9]+]], #-11
; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
%addr = bitcast %va_list* @var to i8*
diff --git a/test/CodeGen/AArch64/arm64-vclz.ll b/test/CodeGen/AArch64/arm64-vclz.ll
index cf5670a0354f..10118f0d5638 100644
--- a/test/CodeGen/AArch64/arm64-vclz.ll
+++ b/test/CodeGen/AArch64/arm64-vclz.ll
@@ -48,6 +48,18 @@ define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
ret <2 x i32> %vclz1.i
}
+define <1 x i64> @test_vclz_u64(<1 x i64> %a) nounwind readnone ssp {
+ ; CHECK-LABEL: test_vclz_u64:
+ %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
+ ret <1 x i64> %vclz1.i
+}
+
+define <1 x i64> @test_vclz_s64(<1 x i64> %a) nounwind readnone ssp {
+ ; CHECK-LABEL: test_vclz_s64:
+ %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
+ ret <1 x i64> %vclz1.i
+}
+
define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_u8:
; CHECK: clz.16b v0, v0
@@ -96,12 +108,28 @@ define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
ret <4 x i32> %vclz1.i
}
+define <2 x i64> @test_vclzq_u64(<2 x i64> %a) nounwind readnone ssp {
+ ; CHECK-LABEL: test_vclzq_u64:
+ %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
+ ret <2 x i64> %vclz1.i
+}
+
+define <2 x i64> @test_vclzq_s64(<2 x i64> %a) nounwind readnone ssp {
+ ; CHECK-LABEL: test_vclzq_s64:
+ %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
+ ret <2 x i64> %vclz1.i
+}
+
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
+
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
+declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>, i1) nounwind readnone
+
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vecCmpBr.ll b/test/CodeGen/AArch64/arm64-vecCmpBr.ll
index c7321e4b7d07..0c496fedfc2a 100644
--- a/test/CodeGen/AArch64/arm64-vecCmpBr.ll
+++ b/test/CodeGen/AArch64/arm64-vecCmpBr.ll
@@ -59,7 +59,7 @@ define i32 @anyNonZero64(<4 x i16> %a) #0 {
; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
; CHECK: [[LABEL]]:
-; CHECK-NEXT: movz w0, #0
+; CHECK-NEXT: mov w0, #0
entry:
%0 = bitcast <4 x i16> %a to <8 x i8>
@@ -83,7 +83,7 @@ define i32 @anyNonZero128(<8 x i16> %a) #0 {
; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
; CHECK: [[LABEL]]:
-; CHECK-NEXT: movz w0, #0
+; CHECK-NEXT: mov w0, #0
entry:
%0 = bitcast <8 x i16> %a to <16 x i8>
%vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3
@@ -152,7 +152,7 @@ define i32 @allNonZero64(<4 x i16> %a) #0 {
; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
; CHECK: [[LABEL]]:
-; CHECK-NEXT: movz w0, #0
+; CHECK-NEXT: mov w0, #0
entry:
%0 = bitcast <4 x i16> %a to <8 x i8>
%vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %0) #3
@@ -175,7 +175,7 @@ define i32 @allNonZero128(<8 x i16> %a) #0 {
; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
; CHECK: [[LABEL]]:
-; CHECK-NEXT: movz w0, #0
+; CHECK-NEXT: mov w0, #0
entry:
%0 = bitcast <8 x i16> %a to <16 x i8>
%vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %0) #3
diff --git a/test/CodeGen/AArch64/arm64-vector-ext.ll b/test/CodeGen/AArch64/arm64-vector-ext.ll
index 921cf6a6f0d1..241c3dcb9825 100644
--- a/test/CodeGen/AArch64/arm64-vector-ext.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
;CHECK: @func30
-;CHECK: movi.4h v1, #0x1
+;CHECK: movi.4h v1, #1
;CHECK: and.8b v0, v0, v1
;CHECK: ushll.4s v0, v0, #0
;CHECK: str q0, [x0]
diff --git a/test/CodeGen/AArch64/arm64-vector-imm.ll b/test/CodeGen/AArch64/arm64-vector-imm.ll
index d3de88d2049d..aa3ffd261d4b 100644
--- a/test/CodeGen/AArch64/arm64-vector-imm.ll
+++ b/test/CodeGen/AArch64/arm64-vector-imm.ll
@@ -50,35 +50,35 @@ define <2 x double> @foo(<2 x double> %bar) nounwind {
define <4 x i32> @movi_4s_imm_t1() nounwind readnone ssp {
entry:
; CHECK-LABEL: movi_4s_imm_t1:
-; CHECK: movi.4s v0, #0x4b
+; CHECK: movi.4s v0, #75
ret <4 x i32> <i32 75, i32 75, i32 75, i32 75>
}
define <4 x i32> @movi_4s_imm_t2() nounwind readnone ssp {
entry:
; CHECK-LABEL: movi_4s_imm_t2:
-; CHECK: movi.4s v0, #0x4b, lsl #8
+; CHECK: movi.4s v0, #75, lsl #8
ret <4 x i32> <i32 19200, i32 19200, i32 19200, i32 19200>
}
define <4 x i32> @movi_4s_imm_t3() nounwind readnone ssp {
entry:
; CHECK-LABEL: movi_4s_imm_t3:
-; CHECK: movi.4s v0, #0x4b, lsl #16
+; CHECK: movi.4s v0, #75, lsl #16
ret <4 x i32> <i32 4915200, i32 4915200, i32 4915200, i32 4915200>
}
define <4 x i32> @movi_4s_imm_t4() nounwind readnone ssp {
entry:
; CHECK-LABEL: movi_4s_imm_t4:
-; CHECK: movi.4s v0, #0x4b, lsl #24
+; CHECK: movi.4s v0, #75, lsl #24
ret <4 x i32> <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200>
}
define <8 x i16> @movi_8h_imm_t5() nounwind readnone ssp {
entry:
; CHECK-LABEL: movi_8h_imm_t5:
-; CHECK: movi.8h v0, #0x4b
+; CHECK: movi.8h v0, #75
ret <8 x i16> <i16 75, i16 75, i16 75, i16 75, i16 75, i16 75, i16 75, i16 75>
}
@@ -86,28 +86,28 @@ entry:
define <8 x i16> @movi_8h_imm_t6() nounwind readnone ssp {
entry:
; CHECK-LABEL: movi_8h_imm_t6:
-; CHECK: movi.8h v0, #0x4b, lsl #8
+; CHECK: movi.8h v0, #75, lsl #8
ret <8 x i16> <i16 19200, i16 19200, i16 19200, i16 19200, i16 19200, i16 19200, i16 19200, i16 19200>
}
define <4 x i32> @movi_4s_imm_t7() nounwind readnone ssp {
entry:
; CHECK-LABEL: movi_4s_imm_t7:
-; CHECK: movi.4s v0, #0x4b, msl #8
+; CHECK: movi.4s v0, #75, msl #8
ret <4 x i32> <i32 19455, i32 19455, i32 19455, i32 19455>
}
define <4 x i32> @movi_4s_imm_t8() nounwind readnone ssp {
entry:
; CHECK-LABEL: movi_4s_imm_t8:
-; CHECK: movi.4s v0, #0x4b, msl #16
+; CHECK: movi.4s v0, #75, msl #16
ret <4 x i32> <i32 4980735, i32 4980735, i32 4980735, i32 4980735>
}
define <16 x i8> @movi_16b_imm_t9() nounwind readnone ssp {
entry:
; CHECK-LABEL: movi_16b_imm_t9:
-; CHECK: movi.16b v0, #0x4b
+; CHECK: movi.16b v0, #75
ret <16 x i8> <i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75,
i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75>
}
diff --git a/test/CodeGen/AArch64/arm64-virtual_base.ll b/test/CodeGen/AArch64/arm64-virtual_base.ll
index 703d81a8d4fe..4ecfde4f83e2 100644
--- a/test/CodeGen/AArch64/arm64-virtual_base.ll
+++ b/test/CodeGen/AArch64/arm64-virtual_base.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O3 -march arm64 | FileCheck %s
+; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -disable-post-ra | FileCheck %s
; <rdar://13463602>
%struct.Counter_Struct = type { i64, i64 }
diff --git a/test/CodeGen/AArch64/arm64-vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll
index d5a12483db40..b5a6788979e2 100644
--- a/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/test/CodeGen/AArch64/arm64-vshift.ll
@@ -1315,7 +1315,7 @@ define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind {
;CHECK-LABEL: uqshli8b_1:
-;CHECK: movi.8b [[REG:v[0-9]+]], #0x8
+;CHECK: movi.8b [[REG:v[0-9]+]], #8
;CHECK: uqshl.8b v0, v0, [[REG]]
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
diff --git a/test/CodeGen/AArch64/arm64-vshuffle.ll b/test/CodeGen/AArch64/arm64-vshuffle.ll
index 15ea21b7638d..b4f57675ace3 100644
--- a/test/CodeGen/AArch64/arm64-vshuffle.ll
+++ b/test/CodeGen/AArch64/arm64-vshuffle.ll
@@ -36,7 +36,7 @@ bb:
}
; CHECK: test3
-; CHECK: movi.4s v{{[0-9]+}}, #0x1
+; CHECK: movi.4s v{{[0-9]+}}, #1
define <16 x i1> @test3(i1* %ptr, i32 %v) {
bb:
%Shuff = shufflevector <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <16 x i1> undef,
diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
index 349bb6fd78af..ae77f7e099db 100644
--- a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -1,44 +1,52 @@
-; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefix=CYCLONE --check-prefix=ALL
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefix=KRYO --check-prefix=ALL
+
; rdar://11481771
; rdar://13713797
define void @t1() nounwind ssp {
entry:
-; CHECK-LABEL: t1:
-; CHECK-NOT: fmov
-; CHECK: movi.2d v0, #0000000000000000
-; CHECK: movi.2d v1, #0000000000000000
-; CHECK: movi.2d v2, #0000000000000000
-; CHECK: movi.2d v3, #0000000000000000
+; ALL-LABEL: t1:
+; ALL-NOT: fmov
+; CYCLONE: movi.2d v0, #0000000000000000
+; CYCLONE: movi.2d v1, #0000000000000000
+; CYCLONE: movi.2d v2, #0000000000000000
+; CYCLONE: movi.2d v3, #0000000000000000
+; KRYO: movi v0.2d, #0000000000000000
+; KRYO: movi v1.2d, #0000000000000000
+; KRYO: movi v2.2d, #0000000000000000
+; KRYO: movi v3.2d, #0000000000000000
tail call void @bar(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) nounwind
ret void
}
define void @t2() nounwind ssp {
entry:
-; CHECK-LABEL: t2:
-; CHECK-NOT: mov w0, wzr
-; CHECK: movz w0, #0
-; CHECK: movz w1, #0
+; ALL-LABEL: t2:
+; ALL-NOT: mov w0, wzr
+; ALL: mov w0, #0
+; ALL: mov w1, #0
tail call void @bari(i32 0, i32 0) nounwind
ret void
}
define void @t3() nounwind ssp {
entry:
-; CHECK-LABEL: t3:
-; CHECK-NOT: mov x0, xzr
-; CHECK: movz x0, #0
-; CHECK: movz x1, #0
+; ALL-LABEL: t3:
+; ALL-NOT: mov x0, xzr
+; ALL: mov x0, #0
+; ALL: mov x1, #0
tail call void @barl(i64 0, i64 0) nounwind
ret void
}
define void @t4() nounwind ssp {
-; CHECK-LABEL: t4:
-; CHECK-NOT: fmov
-; CHECK: movi.2d v0, #0000000000000000
-; CHECK: movi.2d v1, #0000000000000000
+; ALL-LABEL: t4:
+; ALL-NOT: fmov
+; CYCLONE: movi.2d v0, #0000000000000000
+; CYCLONE: movi.2d v1, #0000000000000000
+; KRYO: movi v0.2d, #0000000000000000
+; KRYO: movi v1.2d, #0000000000000000
tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind
ret void
}
@@ -47,3 +55,29 @@ declare void @bar(double, double, double, double)
declare void @bari(i32, i32)
declare void @barl(i64, i64)
declare void @barf(float, float)
+
+; We used to produce spills+reloads for a Q register with zero cycle zeroing
+; enabled.
+; ALL-LABEL: foo:
+; ALL-NOT: str {{q[0-9]+}}
+; ALL-NOT: ldr {{q[0-9]+}}
+define double @foo(i32 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %phi0 = phi double [ 1.0, %entry ], [ %v0, %for.body ]
+ %i.076 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %conv21 = sitofp i32 %i.076 to double
+ %call = tail call fast double @sin(double %conv21)
+ %cmp.i = fcmp fast olt double %phi0, %call
+ %v0 = select i1 %cmp.i, double %call, double %phi0
+ %inc = add nuw nsw i32 %i.076, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret double %v0
+}
+
+declare double @sin(double)
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index 900d2072925f..9fac8d8a868a 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
diff --git a/test/CodeGen/AArch64/bitfield-extract.ll b/test/CodeGen/AArch64/bitfield-extract.ll
new file mode 100644
index 000000000000..5e727b669e22
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield-extract.ll
@@ -0,0 +1,98 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; CHECK-LABEL: @test1
+; CHECK: sbfx {{x[0-9]+}}, x0, #23, #9
+define i64 @test1(i32 %a) {
+ %tmp = ashr i32 %a, 23
+ %ext = sext i32 %tmp to i64
+ %res = add i64 %ext, 1
+ ret i64 %res
+}
+
+; CHECK-LABEL: @test2
+; CHECK: sbfx w0, w0, #23, #8
+define signext i8 @test2(i32 %a) {
+ %tmp = ashr i32 %a, 23
+ %res = trunc i32 %tmp to i8
+ ret i8 %res
+}
+
+; CHECK-LABEL: @test3
+; CHECK: sbfx w0, w0, #23, #8
+define signext i8 @test3(i32 %a) {
+ %tmp = lshr i32 %a, 23
+ %res = trunc i32 %tmp to i8
+ ret i8 %res
+}
+
+; CHECK-LABEL: @test4
+; CHECK: sbfx w0, w0, #15, #16
+define signext i16 @test4(i32 %a) {
+ %tmp = lshr i32 %a, 15
+ %res = trunc i32 %tmp to i16
+ ret i16 %res
+}
+
+; CHECK-LABEL: @test5
+; CHECK: sbfx w0, w0, #16, #8
+define signext i8 @test5(i64 %a) {
+ %tmp = lshr i64 %a, 16
+ %res = trunc i64 %tmp to i8
+ ret i8 %res
+}
+
+; CHECK-LABEL: @test6
+; CHECK: sbfx x0, x0, #30, #8
+define signext i8 @test6(i64 %a) {
+ %tmp = lshr i64 %a, 30
+ %res = trunc i64 %tmp to i8
+ ret i8 %res
+}
+
+; CHECK-LABEL: @test7
+; CHECK: sbfx x0, x0, #23, #16
+define signext i16 @test7(i64 %a) {
+ %tmp = lshr i64 %a, 23
+ %res = trunc i64 %tmp to i16
+ ret i16 %res
+}
+
+; CHECK-LABEL: @test8
+; CHECK: asr w0, w0, #25
+define signext i8 @test8(i32 %a) {
+ %tmp = ashr i32 %a, 25
+ %res = trunc i32 %tmp to i8
+ ret i8 %res
+}
+
+; CHECK-LABEL: @test9
+; CHECK: lsr w0, w0, #25
+define signext i8 @test9(i32 %a) {
+ %tmp = lshr i32 %a, 25
+ %res = trunc i32 %tmp to i8
+ ret i8 %res
+}
+
+; CHECK-LABEL: @test10
+; CHECK: lsr x0, x0, #49
+define signext i16 @test10(i64 %a) {
+ %tmp = lshr i64 %a, 49
+ %res = trunc i64 %tmp to i16
+ ret i16 %res
+}
+
+; SHR with multiple uses is fine as SXTH and SBFX are both aliases of SBFM.
+; However, allowing the transformation means the SHR and SBFX can execute in
+; parallel.
+;
+; CHECK-LABEL: @test11
+; CHECK: lsr x1, x0, #23
+; CHECK: sbfx x0, x0, #23, #16
+define void @test11(i64 %a) {
+ %tmp = lshr i64 %a, 23
+ %res = trunc i64 %tmp to i16
+ call void @use(i16 %res, i64 %tmp)
+ ret void
+}
+
+declare void @use(i16 signext, i64)
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
index 509b547a5c82..735be244d457 100644
--- a/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
; First, a simple example from Clang. The registers could plausibly be
; different, but probably won't be.
@@ -237,3 +237,246 @@ define i32 @test_nouseful_bits(i8 %a, i32 %b) {
%shl.4 = shl i32 %or.3, 8 ; A A A 0
ret i32 %shl.4
}
+
+define void @test_nouseful_strb(i32* %ptr32, i8* %ptr8, i32 %x) {
+entry:
+; CHECK-LABEL: @test_nouseful_strb
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xf8
+; CHECK-NEXT: bfxil [[REG1]], w2, #16, #3
+; CHECK-NEXT: strb [[REG1]],
+; CHECK-NEXT: ret
+ %0 = load i32, i32* %ptr32, align 8
+ %and = and i32 %0, -8
+ %shr = lshr i32 %x, 16
+ %and1 = and i32 %shr, 7
+ %or = or i32 %and, %and1
+ %trunc = trunc i32 %or to i8
+ store i8 %trunc, i8* %ptr8
+ ret void
+}
+
+define void @test_nouseful_strh(i32* %ptr32, i16* %ptr16, i32 %x) {
+entry:
+; CHECK-LABEL: @test_nouseful_strh
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0
+; CHECK-NEXT: bfxil [[REG1]], w2, #16, #4
+; CHECK-NEXT: strh [[REG1]],
+; CHECK-NEXT: ret
+ %0 = load i32, i32* %ptr32, align 8
+ %and = and i32 %0, -16
+ %shr = lshr i32 %x, 16
+ %and1 = and i32 %shr, 15
+ %or = or i32 %and, %and1
+ %trunc = trunc i32 %or to i16
+ store i16 %trunc, i16* %ptr16
+ ret void
+}
+
+define void @test_nouseful_sturb(i32* %ptr32, i8* %ptr8, i32 %x) {
+entry:
+; CHECK-LABEL: @test_nouseful_sturb
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xf8
+; CHECK-NEXT: bfxil [[REG1]], w2, #16, #3
+; CHECK-NEXT: sturb [[REG1]],
+; CHECK-NEXT: ret
+ %0 = load i32, i32* %ptr32, align 8
+ %and = and i32 %0, -8
+ %shr = lshr i32 %x, 16
+ %and1 = and i32 %shr, 7
+ %or = or i32 %and, %and1
+ %trunc = trunc i32 %or to i8
+ %gep = getelementptr i8, i8* %ptr8, i64 -1
+ store i8 %trunc, i8* %gep
+ ret void
+}
+
+define void @test_nouseful_sturh(i32* %ptr32, i16* %ptr16, i32 %x) {
+entry:
+; CHECK-LABEL: @test_nouseful_sturh
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0
+; CHECK-NEXT: bfxil [[REG1]], w2, #16, #4
+; CHECK-NEXT: sturh [[REG1]],
+; CHECK-NEXT: ret
+ %0 = load i32, i32* %ptr32, align 8
+ %and = and i32 %0, -16
+ %shr = lshr i32 %x, 16
+ %and1 = and i32 %shr, 15
+ %or = or i32 %and, %and1
+ %trunc = trunc i32 %or to i16
+ %gep = getelementptr i16, i16* %ptr16, i64 -1
+ store i16 %trunc, i16* %gep
+ ret void
+}
+
+; The next set of tests generate a BFXIL from 'or (and X, Mask0Imm),
+; (and Y, Mask1Imm)' iff Mask0Imm and ~Mask1Imm are equivalent and one of the
+; MaskImms is a shifted mask (e.g., 0x000ffff0).
+
+; CHECK-LABEL: @test_or_and_and1
+; CHECK: lsr w8, w1, #4
+; CHECK: bfi w0, w8, #4, #12
+define i32 @test_or_and_and1(i32 %a, i32 %b) {
+entry:
+ %and = and i32 %a, -65521 ; 0xffff000f
+ %and1 = and i32 %b, 65520 ; 0x0000fff0
+ %or = or i32 %and1, %and
+ ret i32 %or
+}
+
+; CHECK-LABEL: @test_or_and_and2
+; CHECK: lsr w8, w0, #4
+; CHECK: bfi w1, w8, #4, #12
+define i32 @test_or_and_and2(i32 %a, i32 %b) {
+entry:
+ %and = and i32 %a, 65520 ; 0x0000fff0
+ %and1 = and i32 %b, -65521 ; 0xffff000f
+ %or = or i32 %and1, %and
+ ret i32 %or
+}
+
+; CHECK-LABEL: @test_or_and_and3
+; CHECK: lsr x8, x1, #16
+; CHECK: bfi x0, x8, #16, #32
+define i64 @test_or_and_and3(i64 %a, i64 %b) {
+entry:
+ %and = and i64 %a, -281474976645121 ; 0xffff00000000ffff
+ %and1 = and i64 %b, 281474976645120 ; 0x0000ffffffff0000
+ %or = or i64 %and1, %and
+ ret i64 %or
+}
+
+; Don't convert 'and' with multiple uses.
+; CHECK-LABEL: @test_or_and_and4
+; CHECK: and w8, w0, #0xffff000f
+; CHECK: and w9, w1, #0xfff0
+; CHECK: orr w0, w9, w8
+; CHECK: str w8, [x2
+define i32 @test_or_and_and4(i32 %a, i32 %b, i32* %ptr) {
+entry:
+ %and = and i32 %a, -65521
+ store i32 %and, i32* %ptr, align 4
+ %and2 = and i32 %b, 65520
+ %or = or i32 %and2, %and
+ ret i32 %or
+}
+
+; Don't convert 'and' with multiple uses.
+; CHECK-LABEL: @test_or_and_and5
+; CHECK: and w8, w1, #0xfff0
+; CHECK: and w9, w0, #0xffff000f
+; CHECK: orr w0, w8, w9
+; CHECK: str w8, [x2]
+define i32 @test_or_and_and5(i32 %a, i32 %b, i32* %ptr) {
+entry:
+ %and = and i32 %b, 65520
+ store i32 %and, i32* %ptr, align 4
+ %and1 = and i32 %a, -65521
+ %or = or i32 %and, %and1
+ ret i32 %or
+}
+
+; CHECK-LABEL: @test1
+; CHECK: mov [[REG:w[0-9]+]], #5
+; CHECK: bfxil w0, [[REG]], #0, #4
+define i32 @test1(i32 %a) {
+ %1 = and i32 %a, -16 ; 0xfffffff0
+ %2 = or i32 %1, 5 ; 0x00000005
+ ret i32 %2
+}
+
+; CHECK-LABEL: @test2
+; CHECK: mov [[REG:w[0-9]+]], #10
+; CHECK: bfi w0, [[REG]], #22, #4
+define i32 @test2(i32 %a) {
+ %1 = and i32 %a, -62914561 ; 0xfc3fffff
+ %2 = or i32 %1, 41943040 ; 0x06400000
+ ret i32 %2
+}
+
+; CHECK-LABEL: @test3
+; CHECK: mov [[REG:x[0-9]+]], #5
+; CHECK: bfxil x0, [[REG]], #0, #3
+define i64 @test3(i64 %a) {
+ %1 = and i64 %a, -8 ; 0xfffffffffffffff8
+ %2 = or i64 %1, 5 ; 0x0000000000000005
+ ret i64 %2
+}
+
+; CHECK-LABEL: @test4
+; CHECK: mov [[REG:x[0-9]+]], #9
+; CHECK: bfi x0, [[REG]], #1, #7
+define i64 @test4(i64 %a) {
+ %1 = and i64 %a, -255 ; 0xffffffffffffff01
+ %2 = or i64 %1, 18 ; 0x0000000000000012
+ ret i64 %2
+}
+
+; Don't generate BFI/BFXIL if the immediate can be encoded in the ORR.
+; CHECK-LABEL: @test5
+; CHECK: and [[REG:w[0-9]+]], w0, #0xfffffff0
+; CHECK: orr w0, [[REG]], #0x6
+define i32 @test5(i32 %a) {
+ %1 = and i32 %a, 4294967280 ; 0xfffffff0
+ %2 = or i32 %1, 6 ; 0x00000006
+ ret i32 %2
+}
+
+; BFXIL will use the same constant as the ORR, so we don't care how the constant
+; is materialized (it's an equal cost either way).
+; CHECK-LABEL: @test6
+; CHECK: mov [[REG:w[0-9]+]], #720896
+; CHECK: movk [[REG]], #23250
+; CHECK: bfxil w0, [[REG]], #0, #20
+define i32 @test6(i32 %a) {
+ %1 = and i32 %a, 4293918720 ; 0xfff00000
+ %2 = or i32 %1, 744146 ; 0x000b5ad2
+ ret i32 %2
+}
+
+; BFIs that require the same number of instruction to materialize the constant
+; as the original ORR are okay.
+; CHECK-LABEL: @test7
+; CHECK: mov [[REG:w[0-9]+]], #327680
+; CHECK: movk [[REG]], #44393
+; CHECK: bfi w0, [[REG]], #1, #19
+define i32 @test7(i32 %a) {
+ %1 = and i32 %a, 4293918721 ; 0xfff00001
+ %2 = or i32 %1, 744146 ; 0x000b5ad2
+ ret i32 %2
+}
+
+; BFIs that require more instructions to materialize the constant as compared
+; to the original ORR are not okay. In this case we would be replacing the
+; 'and' with a 'movk', which would decrease ILP while using the same number of
+; instructions.
+; CHECK-LABEL: @test8
+; CHECK: mov [[REG2:x[0-9]+]], #157599529959424
+; CHECK: and [[REG1:x[0-9]+]], x0, #0xff000000000000ff
+; CHECK: movk [[REG2]], #31059, lsl #16
+; CHECK: orr x0, [[REG1]], [[REG2]]
+define i64 @test8(i64 %a) {
+ %1 = and i64 %a, -72057594037927681 ; 0xff000000000000ff
+ %2 = or i64 %1, 157601565442048 ; 0x00008f5679530000
+ ret i64 %2
+}
+
+; This test exposed an issue with an overly aggressive assert. The bit of code
+; that is expected to catch this case is unable to deal with the trunc, which
+; results in a failing check due to a mismatch between the BFI opcode and
+; the expected value type of the OR.
+; CHECK-LABEL: @test9
+; CHECK: lsr x0, x0, #12
+; CHECK: lsr [[REG:w[0-9]+]], w1, #23
+; CHECK: bfi w0, [[REG]], #23, #9
+define i32 @test9(i64 %b, i32 %e) {
+ %c = lshr i64 %b, 12
+ %d = trunc i64 %c to i32
+ %f = and i32 %d, 8388607
+ %g = and i32 %e, -8388608
+ %h = or i32 %g, %f
+ ret i32 %h
+}
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
index 5f19b6943b8e..8bd1279544b8 100644
--- a/test/CodeGen/AArch64/bitfield.ll
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
@var32 = global i32 0
@var64 = global i64 0
diff --git a/test/CodeGen/AArch64/bitreverse.ll b/test/CodeGen/AArch64/bitreverse.ll
index 936e3554b397..2eee7cfd8b97 100644
--- a/test/CodeGen/AArch64/bitreverse.ll
+++ b/test/CodeGen/AArch64/bitreverse.ll
@@ -7,6 +7,7 @@ declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
define <2 x i16> @f(<2 x i16> %a) {
; CHECK-LABEL: f:
+; CHECK: rev32
; CHECK: ushr
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
ret <2 x i16> %b
@@ -47,14 +48,14 @@ define <8 x i8> @g_vec(<8 x i8> %a) {
; Try and match as much of the sequence as precisely as possible.
; CHECK-LABEL: g_vec:
-; CHECK-DAG: movi [[M1:v.*]], #0x80
-; CHECK-DAG: movi [[M2:v.*]], #0x40
-; CHECK-DAG: movi [[M3:v.*]], #0x20
-; CHECK-DAG: movi [[M4:v.*]], #0x10
-; CHECK-DAG: movi [[M5:v.*]], #0x8
-; CHECK-DAG: movi [[M6:v.*]], #0x4{{$}}
-; CHECK-DAG: movi [[M7:v.*]], #0x2{{$}}
-; CHECK-DAG: movi [[M8:v.*]], #0x1{{$}}
+; CHECK-DAG: movi [[M1:v.*]], #128
+; CHECK-DAG: movi [[M2:v.*]], #64
+; CHECK-DAG: movi [[M3:v.*]], #32
+; CHECK-DAG: movi [[M4:v.*]], #16
+; CHECK-DAG: movi [[M5:v.*]], #8{{$}}
+; CHECK-DAG: movi [[M6:v.*]], #4{{$}}
+; CHECK-DAG: movi [[M7:v.*]], #2{{$}}
+; CHECK-DAG: movi [[M8:v.*]], #1{{$}}
; CHECK-DAG: shl [[S1:v.*]], v0.8b, #7
; CHECK-DAG: shl [[S2:v.*]], v0.8b, #5
; CHECK-DAG: shl [[S3:v.*]], v0.8b, #3
diff --git a/test/CodeGen/AArch64/branch-folder-merge-mmos.ll b/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
index 3f9c0239fe41..e3af90ae4831 100644
--- a/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
+++ b/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu -stop-after branch-folder -o /dev/null < %s | FileCheck %s
+; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu -stop-after branch-folder -o - < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; Function Attrs: norecurse nounwind
diff --git a/test/CodeGen/AArch64/bswap-known-bits.ll b/test/CodeGen/AArch64/bswap-known-bits.ll
new file mode 100644
index 000000000000..e5de7953d1b8
--- /dev/null
+++ b/test/CodeGen/AArch64/bswap-known-bits.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=aarch64-apple-darwin | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+
+; CHECK-LABEL: @test1
+; CHECK: orr w0, wzr, #0x1
+define i1 @test1(i16 %arg) {
+ %a = or i16 %arg, 511
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 256
+ %res = icmp eq i16 %and, 256
+ ret i1 %res
+}
+
+; CHECK-LABEL: @test2
+; CHECK: orr w0, wzr, #0x1
+define i1 @test2(i16 %arg) {
+ %a = or i16 %arg, 1
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 256
+ %res = icmp eq i16 %and, 256
+ ret i1 %res
+}
+
+; CHECK-LABEL: @test3
+; CHECK: orr w0, wzr, #0x1
+define i1 @test3(i16 %arg) {
+ %a = or i16 %arg, 256
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 1
+ %res = icmp eq i16 %and, 1
+ ret i1 %res
+}
+
+; CHECK-LABEL: @test4
+; CHECK: orr w0, wzr, #0x1
+define i1 @test4(i32 %arg) {
+ %a = or i32 %arg, 2147483647 ; i32_MAX
+ %b = call i32 @llvm.bswap.i32(i32 %a)
+ %and = and i32 %b, 127
+ %res = icmp eq i32 %and, 127
+ ret i1 %res
+}
diff --git a/test/CodeGen/AArch64/cmpxchg-O0.ll b/test/CodeGen/AArch64/cmpxchg-O0.ll
new file mode 100644
index 000000000000..c79d82a63774
--- /dev/null
+++ b/test/CodeGen/AArch64/cmpxchg-O0.ll
@@ -0,0 +1,75 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 %s -o - | FileCheck %s
+
+define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
+; CHECK-LABEL: test_cmpxchg_8:
+; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0]
+; CHECK: cmp [[OLD]], w1, uxtb
+; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
+; CHECK: stlxrb [[STATUS:w[3-9]]], w2, [x0]
+; CHECK: cbnz [[STATUS]], [[RETRY]]
+; CHECK: [[DONE]]:
+; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
+; CHECK: cset {{w[0-9]+}}, eq
+ %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
+ ret { i8, i1 } %res
+}
+
+define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
+; CHECK-LABEL: test_cmpxchg_16:
+; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0]
+; CHECK: cmp [[OLD]], w1, uxth
+; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
+; CHECK: stlxrh [[STATUS:w[3-9]]], w2, [x0]
+; CHECK: cbnz [[STATUS]], [[RETRY]]
+; CHECK: [[DONE]]:
+; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
+; CHECK: cset {{w[0-9]+}}, eq
+ %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic
+ ret { i16, i1 } %res
+}
+
+define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
+; CHECK-LABEL: test_cmpxchg_32:
+; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxr [[OLD:w[0-9]+]], [x0]
+; CHECK: cmp [[OLD]], w1
+; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
+; CHECK: stlxr [[STATUS:w[3-9]]], w2, [x0]
+; CHECK: cbnz [[STATUS]], [[RETRY]]
+; CHECK: [[DONE]]:
+; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
+; CHECK: cset {{w[0-9]+}}, eq
+ %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+ ret { i32, i1 } %res
+}
+
+define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
+; CHECK-LABEL: test_cmpxchg_64:
+; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxr [[OLD:x[0-9]+]], [x0]
+; CHECK: cmp [[OLD]], x1
+; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
+; CHECK: stlxr [[STATUS:w[3-9]]], x2, [x0]
+; CHECK: cbnz [[STATUS]], [[RETRY]]
+; CHECK: [[DONE]]:
+; CHECK: subs {{x[0-9]+}}, [[OLD]], x1
+; CHECK: cset {{w[0-9]+}}, eq
+ %res = cmpxchg i64* %addr, i64 %desired, i64 %new seq_cst monotonic
+ ret { i64, i1 } %res
+}
+
+define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nounwind {
+; CHECK-LABEL: test_cmpxchg_128:
+; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0]
+; CHECK: cmp [[OLD_LO]], x2
+; CHECK: sbcs xzr, [[OLD_HI]], x3
+; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
+; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0]
+; CHECK: cbnz [[STATUS]], [[RETRY]]
+; CHECK: [[DONE]]:
+ %res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst monotonic
+ ret { i128, i1 } %res
+}
diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index 004267f4e4e0..1f8e0efa0675 100644
--- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -405,11 +405,11 @@ return: ; preds = %land.lhs.true, %con
define void @cmp_shifted(i32 %in, i32 %lhs, i32 %rhs) {
; CHECK-LABEL: cmp_shifted:
-; CHECK: cmp w0, #1
-; [...]
; CHECK: cmp w0, #2, lsl #12
+; [...]
+; CHECK: cmp w0, #1
- %tst_low = icmp sgt i32 %in, 0
+ %tst_low = icmp sgt i32 %in, 8191
br i1 %tst_low, label %true, label %false
true:
@@ -417,7 +417,7 @@ true:
ret void
false:
- %tst = icmp sgt i32 %in, 8191
+ %tst = icmp sgt i32 %in, 0
br i1 %tst, label %truer, label %falser
truer:
@@ -429,6 +429,42 @@ falser:
ret void
}
+define i32 @combine_gt_ge_sel(i64 %v, i64* %p) #0 {
+; CHECK-LABEL: combine_gt_ge_sel
+; CHECK: ldr [[reg1:w[0-9]*]],
+; CHECK: cmp [[reg1]], #0
+; CHECK: csel {{.*}}, gt
+entry:
+ %0 = load i32, i32* @a, align 4
+ %cmp = icmp sgt i32 %0, 0
+ %m = select i1 %cmp, i64 %v, i64 0
+ store i64 %m, i64* %p
+ br i1 %cmp, label %lor.lhs.false, label %land.lhs.true
+
+land.lhs.true: ; preds = %entry
+ %1 = load i32, i32* @b, align 4
+ %2 = load i32, i32* @c, align 4
+ %cmp1 = icmp eq i32 %1, %2
+ br i1 %cmp1, label %return, label %land.lhs.true3
+
+lor.lhs.false: ; preds = %entry
+ %cmp2 = icmp sgt i32 %0, 1
+ br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true
+ %3 = load i32, i32* @b, align 4
+ %4 = load i32, i32* @d, align 4
+ %cmp4 = icmp eq i32 %3, %4
+ br i1 %cmp4, label %return, label %if.end
+
+if.end: ; preds = %land.lhs.true3, %lor.lhs.false
+ br label %return
+
+return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+ %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+ ret i32 %retval.0
+}
+
declare i32 @zoo(i32)
declare double @yoo(i32)
diff --git a/test/CodeGen/AArch64/complex-int-to-fp.ll b/test/CodeGen/AArch64/complex-int-to-fp.ll
index 1102553ab551..227c626ba15d 100644
--- a/test/CodeGen/AArch64/complex-int-to-fp.ll
+++ b/test/CodeGen/AArch64/complex-int-to-fp.ll
@@ -155,7 +155,7 @@ define <4 x float> @test_signed_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
}
define <4 x float> @test_unsigned_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v4i8_to_v4f32
-; CHECK: bic.4h v0, #0xff, lsl #8
+; CHECK: bic.4h v0, #255, lsl #8
; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0
; CHECK: ucvtf.4s v0, [[VAL32]]
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
index dfc83aacfcfc..b39cea1f6192 100644
--- a/test/CodeGen/AArch64/cond-sel.ll
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
@var32 = global i32 0
@@ -10,8 +10,8 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) minsize {
%tst1 = icmp ugt i32 %lhs32, %rhs32
%val1 = select i1 %tst1, i32 42, i32 52
store i32 %val1, i32* @var32
-; CHECK-DAG: movz [[W52:w[0-9]+]], #{{52|0x34}}
-; CHECK-DAG: movz [[W42:w[0-9]+]], #{{42|0x2a}}
+; CHECK-DAG: mov [[W52:w[0-9]+]], #{{52|0x34}}
+; CHECK-DAG: mov [[W42:w[0-9]+]], #{{42|0x2a}}
; CHECK: csel {{w[0-9]+}}, [[W42]], [[W52]], hi
%rhs64 = sext i32 %rhs32 to i64
@@ -34,8 +34,8 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r
; CHECK-NOFP-NOT: fcmp
%val1 = select i1 %tst1, i32 42, i32 52
store i32 %val1, i32* @var32
-; CHECK: movz [[W52:w[0-9]+]], #{{52|0x34}}
-; CHECK: movz [[W42:w[0-9]+]], #{{42|0x2a}}
+; CHECK: mov [[W52:w[0-9]+]], #{{52|0x34}}
+; CHECK: mov [[W42:w[0-9]+]], #{{42|0x2a}}
; CHECK: csel [[MAYBETRUE:w[0-9]+]], [[W42]], [[W52]], mi
; CHECK: csel {{w[0-9]+}}, [[W42]], [[MAYBETRUE]], gt
@@ -46,7 +46,7 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r
%val2 = select i1 %tst2, i64 9, i64 15
store i64 %val2, i64* @var64
; CHECK: orr w[[CONST15:[0-9]+]], wzr, #0xf
-; CHECK: movz {{[wx]}}[[CONST9:[0-9]+]], #{{9|0x9}}
+; CHECK: mov {{[wx]}}[[CONST9:[0-9]+]], #{{9|0x9}}
; CHECK: csel [[MAYBETRUE:x[0-9]+]], x[[CONST9]], x[[CONST15]], eq
; CHECK: csel {{x[0-9]+}}, x[[CONST9]], [[MAYBETRUE]], vs
@@ -135,6 +135,34 @@ define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) minsize {
; CHECK: ret
}
+define void @test_csinv0(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) minsize {
+; CHECK-LABEL: test_csinv0:
+
+ %tst1 = icmp ugt i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, i32 0, i32 -1
+ store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csetm {{w[0-9]+}}, ls
+
+ %rhs2 = add i32 %rhs32, 42
+ %tst2 = icmp sle i32 %lhs32, %rhs2
+ %val2 = select i1 %tst2, i32 -1, i32 %rhs2
+ store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS2:w[0-9]+]], [[RHS2:w[0-9]+]]
+; CHECK: csinv {{w[0-9]+}}, [[RHS2]], wzr, gt
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+ %rhs3 = mul i64 %rhs64, 19
+ %tst3 = icmp ugt i64 %lhs64, %rhs3
+ %val3 = select i1 %tst3, i64 %rhs3, i64 -1
+ store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS3:x[0-9]+]], [[RHS3:x[0-9]+]]
+; CHECK: csinv {{x[0-9]+}}, [[RHS3]], xzr, hi
+
+ ret void
+; CHECK: ret
+}
+
define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) minsize {
; CHECK-LABEL: test_csneg:
diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll
index 9c2a4fd55d1b..3296e38b64f4 100644
--- a/test/CodeGen/AArch64/cpus.ll
+++ b/test/CodeGen/AArch64/cpus.ll
@@ -6,7 +6,10 @@
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a72 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a73 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m1 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=kryo 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=vulcan 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
; CHECK-NOT: {{.*}} is not a recognized processor for this target
diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll
index 9996c0d3aba8..a36aad51ca82 100644
--- a/test/CodeGen/AArch64/cxx-tlscc.ll
+++ b/test/CodeGen/AArch64/cxx-tlscc.ll
@@ -1,8 +1,10 @@
; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s
+; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck %s
; Shrink wrapping currently does not kick in because we have a TLS CALL
; in the entry block and it will clobber the link register.
+; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck --check-prefix=CHECK-O0 %s
+
%struct.S = type { i8 }
@sg = internal thread_local global %struct.S zeroinitializer, align 1
@@ -42,7 +44,9 @@ __tls_init.exit:
; CHECK-NOT: stp d3, d2
; CHECK-NOT: stp d1, d0
; CHECK-NOT: stp x20, x19
-; CHECK-NOT: stp x14, x13
+; FIXME: The splitting logic in the register allocator fails to split along
+; control flow here, we used to get this right by accident before...
+; CHECK-NOTXX: stp x14, x13
; CHECK-NOT: stp x12, x11
; CHECK-NOT: stp x10, x9
; CHECK-NOT: stp x8, x7
@@ -61,7 +65,7 @@ __tls_init.exit:
; CHECK-NOT: ldp x8, x7
; CHECK-NOT: ldp x10, x9
; CHECK-NOT: ldp x12, x11
-; CHECK-NOT: ldp x14, x13
+; CHECK-NOTXX: ldp x14, x13
; CHECK-NOT: ldp x20, x19
; CHECK-NOT: ldp d1, d0
; CHECK-NOT: ldp d3, d2
@@ -76,6 +80,52 @@ __tls_init.exit:
; CHECK-NOT: ldp d29, d28
; CHECK-NOT: ldp d31, d30
+; CHECK-O0-LABEL: _ZTW2sg
+; CHECK-O0: stp d31, d30
+; CHECK-O0: stp d29, d28
+; CHECK-O0: stp d27, d26
+; CHECK-O0: stp d25, d24
+; CHECK-O0: stp d23, d22
+; CHECK-O0: stp d21, d20
+; CHECK-O0: stp d19, d18
+; CHECK-O0: stp d17, d16
+; CHECK-O0: stp d7, d6
+; CHECK-O0: stp d5, d4
+; CHECK-O0: stp d3, d2
+; CHECK-O0: stp d1, d0
+; CHECK-O0: stp x14, x13
+; CHECK-O0: stp x12, x11
+; CHECK-O0: stp x10, x9
+; CHECK-O0: stp x8, x7
+; CHECK-O0: stp x6, x5
+; CHECK-O0: stp x4, x3
+; CHECK-O0: stp x2, x1
+; CHECK-O0: blr
+; CHECK-O0: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
+; CHECK-O0: blr
+; CHECK-O0: tlv_atexit
+; CHECK-O0: [[BB_end]]:
+; CHECK-O0: blr
+; CHECK-O0: ldp x2, x1
+; CHECK-O0: ldp x4, x3
+; CHECK-O0: ldp x6, x5
+; CHECK-O0: ldp x8, x7
+; CHECK-O0: ldp x10, x9
+; CHECK-O0: ldp x12, x11
+; CHECK-O0: ldp x14, x13
+; CHECK-O0: ldp d1, d0
+; CHECK-O0: ldp d3, d2
+; CHECK-O0: ldp d5, d4
+; CHECK-O0: ldp d7, d6
+; CHECK-O0: ldp d17, d16
+; CHECK-O0: ldp d19, d18
+; CHECK-O0: ldp d21, d20
+; CHECK-O0: ldp d23, d22
+; CHECK-O0: ldp d25, d24
+; CHECK-O0: ldp d27, d26
+; CHECK-O0: ldp d29, d28
+; CHECK-O0: ldp d31, d30
+
; CHECK-LABEL: _ZTW4sum1
; CHECK-NOT: stp d31, d30
; CHECK-NOT: stp d29, d28
@@ -98,6 +148,77 @@ __tls_init.exit:
; CHECK-NOT: stp x4, x3
; CHECK-NOT: stp x2, x1
; CHECK: blr
+
+; CHECK-O0-LABEL: _ZTW4sum1
+; CHECK-O0-NOT: vstr
+; CHECK-O0-NOT: vldr
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
ret i32* @sum1
}
+
+; Make sure at O0, we don't generate spilling/reloading of the CSRs.
+; CHECK-O0-LABEL: tls_test2
+; CHECK-O0-NOT: stp d31, d30
+; CHECK-O0-NOT: stp d29, d28
+; CHECK-O0-NOT: stp d27, d26
+; CHECK-O0-NOT: stp d25, d24
+; CHECK-O0-NOT: stp d23, d22
+; CHECK-O0-NOT: stp d21, d20
+; CHECK-O0-NOT: stp d19, d18
+; CHECK-O0-NOT: stp d17, d16
+; CHECK-O0-NOT: stp d7, d6
+; CHECK-O0-NOT: stp d5, d4
+; CHECK-O0-NOT: stp d3, d2
+; CHECK-O0-NOT: stp d1, d0
+; CHECK-O0-NOT: stp x20, x19
+; CHECK-O0-NOT: stp x14, x13
+; CHECK-O0-NOT: stp x12, x11
+; CHECK-O0-NOT: stp x10, x9
+; CHECK-O0-NOT: stp x8, x7
+; CHECK-O0-NOT: stp x6, x5
+; CHECK-O0-NOT: stp x4, x3
+; CHECK-O0-NOT: stp x2, x1
+; CHECK-O0: bl {{.*}}tls_helper
+; CHECK-O0-NOT: ldp x2, x1
+; CHECK-O0-NOT: ldp x4, x3
+; CHECK-O0-NOT: ldp x6, x5
+; CHECK-O0-NOT: ldp x8, x7
+; CHECK-O0-NOT: ldp x10, x9
+; CHECK-O0-NOT: ldp x12, x11
+; CHECK-O0-NOT: ldp x14, x13
+; CHECK-O0-NOT: ldp x20, x19
+; CHECK-O0-NOT: ldp d1, d0
+; CHECK-O0-NOT: ldp d3, d2
+; CHECK-O0-NOT: ldp d5, d4
+; CHECK-O0-NOT: ldp d7, d6
+; CHECK-O0-NOT: ldp d17, d16
+; CHECK-O0-NOT: ldp d19, d18
+; CHECK-O0-NOT: ldp d21, d20
+; CHECK-O0-NOT: ldp d23, d22
+; CHECK-O0-NOT: ldp d25, d24
+; CHECK-O0-NOT: ldp d27, d26
+; CHECK-O0-NOT: ldp d29, d28
+; CHECK-O0-NOT: ldp d31, d30
+; CHECK-O0: ret
+%class.C = type { i32 }
+@tC = internal thread_local global %class.C zeroinitializer, align 4
+declare cxx_fast_tlscc void @tls_helper()
+define cxx_fast_tlscc %class.C* @tls_test2() #1 {
+ call cxx_fast_tlscc void @tls_helper()
+ ret %class.C* @tC
+}
+
+; Make sure we do not allow tail call when caller and callee have different
+; calling conventions.
+declare %class.C* @_ZN1CD1Ev(%class.C* readnone returned %this)
+; CHECK-LABEL: tls_test
+; CHECK: bl __tlv_atexit
+define cxx_fast_tlscc void @__tls_test() {
+entry:
+ store i32 0, i32* getelementptr inbounds (%class.C, %class.C* @tC, i64 0, i32 0), align 4
+ %0 = tail call i32 @_tlv_atexit(void (i8*)* bitcast (%class.C* (%class.C*)* @_ZN1CD1Ev to void (i8*)*), i8* bitcast (%class.C* @tC to i8*), i8* nonnull @__dso_handle) #1
+ ret void
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/AArch64/dag-combine-invaraints.ll b/test/CodeGen/AArch64/dag-combine-invaraints.ll
index ac2d057ff3c9..20ba3fea8377 100644
--- a/test/CodeGen/AArch64/dag-combine-invaraints.ll
+++ b/test/CodeGen/AArch64/dag-combine-invaraints.ll
@@ -24,7 +24,7 @@ main_:
ret i32 0
; CHECK: main:
-; CHECK-DAG: movz
+; CHECK-DAG: mov
; CHECK-DAG: orr
; CHECK: csel
}
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
index fbea4a6e5838..f89d7603fd3e 100644
--- a/test/CodeGen/AArch64/directcond.ll
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-NOFP %s
define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
diff --git a/test/CodeGen/AArch64/div_minsize.ll b/test/CodeGen/AArch64/div_minsize.ll
new file mode 100644
index 000000000000..43f12340f19f
--- /dev/null
+++ b/test/CodeGen/AArch64/div_minsize.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+
+define i32 @testsize1(i32 %x) minsize nounwind {
+entry:
+ %div = sdiv i32 %x, 32
+ ret i32 %div
+; CHECK-LABEL: testsize1
+; CHECK: sdiv
+}
+
+define i32 @testsize2(i32 %x) minsize nounwind {
+entry:
+ %div = sdiv i32 %x, 33
+ ret i32 %div
+; CHECK-LABEL: testsize2
+; CHECK: sdiv
+}
+
+define i32 @testsize3(i32 %x) minsize nounwind {
+entry:
+ %div = udiv i32 %x, 32
+ ret i32 %div
+; CHECK-LABEL: testsize3
+; CHECK: lsr
+}
+
+define i32 @testsize4(i32 %x) minsize nounwind {
+entry:
+ %div = udiv i32 %x, 33
+ ret i32 %div
+; CHECK-LABEL: testsize4
+; CHECK: udiv
+}
+
+define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
+entry:
+; CHECK: sdiv_vec8x16_minsize
+; CHECK: sshr v1.8h, v0.8h, #15
+; CHECK: usra v0.8h, v1.8h, #11
+; CHECK: sshr v0.8h, v0.8h, #5
+; CHECK: ret
+ %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+ ret <8 x i16> %0
+}
+
diff --git a/test/CodeGen/AArch64/emutls.ll b/test/CodeGen/AArch64/emutls.ll
index ac5762edba98..36b0ae47bd4a 100644
--- a/test/CodeGen/AArch64/emutls.ll
+++ b/test/CodeGen/AArch64/emutls.ll
@@ -1,5 +1,5 @@
; RUN: llc -emulated-tls -mtriple=aarch64-linux-android \
-; RUN: -relocation-model=pic < %s | FileCheck -check-prefix=ARM64 %s
+; RUN: -relocation-model=pic -disable-fp-elim < %s | FileCheck -check-prefix=ARM64 %s
; Copied from X86/emutls.ll
@@ -54,63 +54,160 @@ entry:
ret i32* @i1
}
+define i32 @f5() nounwind {
+; ARM64-LABEL: f5:
+; ARM64: adrp x0, __emutls_v.i3
+; ARM64: add x0, x0, :lo12:__emutls_v.i3
+; ARM64: bl __emutls_get_address
+; ARM64-NEXT: ldr w0, [x0]
+
+entry:
+ %tmp1 = load i32, i32* @i3
+ ret i32 %tmp1
+}
+
+define i32* @f6() {
+; ARM64-LABEL: f6:
+; ARM64: adrp x0, __emutls_v.i3
+; ARM64: add x0, x0, :lo12:__emutls_v.i3
+; ARM64-NEXT: bl __emutls_get_address
+; ARM64-NEXT: ldp x29, x30, [sp]
+
+entry:
+ ret i32* @i3
+}
+
+; Simple test of comdat __thread variables.
+; template <class T> struct A { static __thread T x; };
+; template <class T> T __thread A<T>::x;
+; int getIntX() { return A<int>::x++; }
+; float getFloatX() { return A<float>::x++; }
+
+$_ZN1AIiE1xE = comdat any
+$_ZN1AIfE1xE = comdat any
+@_ZN1AIiE1xE = linkonce_odr thread_local global i32 0, comdat, align 4
+@_ZN1AIfE1xE = linkonce_odr thread_local global float 0.000000e+00, comdat, align 4
+
+define i32 @_Z7getIntXv() {
+; ARM64-LABEL: _Z7getIntXv:
+; ARM64: adrp x0, :got:__emutls_v._ZN1AIiE1xE
+; ARM64: ldr x0, [x0, :got_lo12:__emutls_v._ZN1AIiE1xE]
+; ARM64-NEXT: bl __emutls_get_address
+; ARM64-NEXT: ldr {{.*}}, [x0]
+; ARM64: add
+; ARM64: str {{.*}}, [x0]
+
+entry:
+ %0 = load i32, i32* @_ZN1AIiE1xE, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @_ZN1AIiE1xE, align 4
+ ret i32 %0
+}
+
+define float @_Z9getFloatXv() {
+; ARM64-LABEL: _Z9getFloatXv:
+; ARM64: adrp x0, :got:__emutls_v._ZN1AIfE1xE
+; ARM64: ldr x0, [x0, :got_lo12:__emutls_v._ZN1AIfE1xE]
+; ARM64-NEXT: bl __emutls_get_address
+; ARM64-NEXT: ldr {{.*}}, [x0]
+; ARM64: fadd s{{.*}}, s
+; ARM64: str s{{.*}}, [x0]
+
+entry:
+ %0 = load float, float* @_ZN1AIfE1xE, align 4
+ %inc = fadd float %0, 1.000000e+00
+ store float %inc, float* @_ZN1AIfE1xE, align 4
+ ret float %0
+}
+
+
;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t.
-; ARM64 .section .data.rel.local,
+; ARM64: .data{{$}}
+; ARM64: .globl __emutls_v.i1
; ARM64-LABEL: __emutls_v.i1:
; ARM64-NEXT: .xword 4
; ARM64-NEXT: .xword 4
; ARM64-NEXT: .xword 0
; ARM64-NEXT: .xword __emutls_t.i1
-; ARM64 .section .rodata,
+; ARM64: .section .rodata,
; ARM64-LABEL: __emutls_t.i1:
; ARM64-NEXT: .word 15
; ARM64-NOT: __emutls_v.i2
-; ARM64 .section .data.rel.local,
+; ARM64: .data{{$}}
+; ARM64-NOT: .globl
; ARM64-LABEL: __emutls_v.i3:
; ARM64-NEXT: .xword 4
; ARM64-NEXT: .xword 4
; ARM64-NEXT: .xword 0
; ARM64-NEXT: .xword __emutls_t.i3
-; ARM64 .section .rodata,
+; ARM64: .section .rodata,
; ARM64-LABEL: __emutls_t.i3:
; ARM64-NEXT: .word 15
-; ARM64 .section .data.rel.local,
+; ARM64: .hidden __emutls_v.i4
+; ARM64: .data{{$}}
+; ARM64: .globl __emutls_v.i4
; ARM64-LABEL: __emutls_v.i4:
; ARM64-NEXT: .xword 4
; ARM64-NEXT: .xword 4
; ARM64-NEXT: .xword 0
; ARM64-NEXT: .xword __emutls_t.i4
-; ARM64 .section .rodata,
+; ARM64: .section .rodata,
; ARM64-LABEL: __emutls_t.i4:
; ARM64-NEXT: .word 15
; ARM64-NOT: __emutls_v.i5:
-; ARM64 .hidden __emutls_v.i5
+; ARM64: .hidden __emutls_v.i5
; ARM64-NOT: __emutls_v.i5:
-; ARM64 .section .data.rel.local,
+; ARM64: .data{{$}}
+; ARM64: .globl __emutls_v.s1
; ARM64-LABEL: __emutls_v.s1:
; ARM64-NEXT: .xword 2
; ARM64-NEXT: .xword 2
; ARM64-NEXT: .xword 0
; ARM64-NEXT: .xword __emutls_t.s1
-; ARM64 .section .rodata,
+; ARM64: .section .rodata,
; ARM64-LABEL: __emutls_t.s1:
; ARM64-NEXT: .hword 15
-; ARM64 .section .data.rel.local,
+; ARM64: .data{{$}}
; ARM64-LABEL: __emutls_v.b1:
; ARM64-NEXT: .xword 1
; ARM64-NEXT: .xword 1
; ARM64-NEXT: .xword 0
; ARM64-NEXT: .xword 0
-; ARM64-NOT: __emutls_t.b1
+; ARM64-NOT: __emutls_t.b1
+
+; ARM64: .section .data.__emutls_v._ZN1AIiE1xE,{{.*}},__emutls_v._ZN1AIiE1xE,comdat
+; ARM64: .weak __emutls_v._ZN1AIiE1xE
+; ARM64: .p2align 3
+; ARM64-LABEL: __emutls_v._ZN1AIiE1xE:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword 0
+
+; ARM64: .section .data.__emutls_v._ZN1AIfE1xE,{{.*}},__emutls_v._ZN1AIfE1xE,comdat
+; ARM64: .weak __emutls_v._ZN1AIfE1xE
+; ARM64: .p2align 3
+; ARM64-LABEL: __emutls_v._ZN1AIfE1xE:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t._ZN1AIfE1xE
+
+; ARM64: .section .rodata.__emutls_t._ZN1AIfE1xE,{{.*}},__emutls_t._ZN1AIfE1xE,comdat
+; ARM64: .weak __emutls_t._ZN1AIfE1xE
+; ARM64: .p2align 2
+; ARM64-LABEL: __emutls_t._ZN1AIfE1xE:
+; ARM64-NEXT: .word 0
+; ARM64-NEXT: .size
diff --git a/test/CodeGen/AArch64/emutls_generic.ll b/test/CodeGen/AArch64/emutls_generic.ll
index 7664db3df8d2..03473cf80ee4 100644
--- a/test/CodeGen/AArch64/emutls_generic.ll
+++ b/test/CodeGen/AArch64/emutls_generic.ll
@@ -37,7 +37,9 @@ entry:
; ARM_64: __emutls_get_address
; ARM_64-NOT: __emutls_t.external_x
; ARM_64-NOT: __emutls_v.external_x:
-; ARM_64: .align 3
+; ARM_64: .data{{$}}
+; ARM_64: .globl __emutls_v.external_y
+; ARM_64: .p2align 3
; ARM_64-LABEL: __emutls_v.external_y:
; ARM_64-NEXT: .xword 1
; ARM_64-NEXT: .xword 2
@@ -47,8 +49,9 @@ entry:
; ARM_64: .section .rodata,
; ARM_64-LABEL: __emutls_t.external_y:
; ARM_64-NEXT: .byte 7
-; ARM_64: .data
-; ARM_64: .align 3
+; ARM_64: .data{{$}}
+; ARM_64-NOT: .globl __emutls_v
+; ARM_64: .p2align 3
; ARM_64-LABEL: __emutls_v.internal_y:
; ARM_64-NEXT: .xword 8
; ARM_64-NEXT: .xword 16
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
index 2203c0c4e698..921009cf821d 100644
--- a/test/CodeGen/AArch64/extern-weak.ll
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=static -o - < %s | FileCheck --check-prefix=CHECK-STATIC %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=static -o - < %s | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s
declare extern_weak i32 @var()
@@ -14,11 +14,6 @@ define i32()* @foo() {
; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:var
; CHECK: ldr x0, [x[[ADDRHI]], :got_lo12:var]
-; CHECK-STATIC: .LCPI0_0:
-; CHECK-STATIC-NEXT: .xword var
-; CHECK-STATIC: adrp x[[VAR:[0-9]+]], .LCPI0_0
-; CHECK-STATIC: ldr x0, [x[[VAR]], :lo12:.LCPI0_0]
-
; In the large model, the usual relocations are absolute and can
; materialise 0.
; CHECK-LARGE: movz x0, #:abs_g3:var
@@ -38,11 +33,6 @@ define i32* @bar() {
; CHECK: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var]
; CHECK: add x0, [[BASE]], #20
-; CHECK-STATIC: .LCPI1_0:
-; CHECK-STATIC-NEXT: .xword arr_var
-; CHECK-STATIC: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, :lo12:.LCPI1_0]
-; CHECK-STATIC: add x0, [[BASE]], #20
-
ret i32* %addr
; In the large model, the usual relocations are absolute and can
@@ -61,9 +51,6 @@ define i32* @wibble() {
; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
; CHECK: add x0, [[BASE]], :lo12:defined_weak_var
-; CHECK-STATIC: adrp [[BASE:x[0-9]+]], defined_weak_var
-; CHECK-STATIC: add x0, [[BASE]], :lo12:defined_weak_var
-
; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var
; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var
; CHECK-LARGE: movk x0, #:abs_g1_nc:defined_weak_var
diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll
index e8ecb13b3564..f50504a9a260 100644
--- a/test/CodeGen/AArch64/f16-instructions.ll
+++ b/test/CodeGen/AArch64/f16-instructions.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra -disable-fp-elim | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -446,6 +446,34 @@ define half @test_sitofp_i64(i64 %a) #0 {
ret half %r
}
+; CHECK-LABEL: test_uitofp_i32_fadd:
+; CHECK-NEXT: ucvtf s1, w0
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
+ %c = uitofp i32 %a to half
+ %r = fadd half %b, %c
+ ret half %r
+}
+
+; CHECK-LABEL: test_sitofp_i32_fadd:
+; CHECK-NEXT: scvtf s1, w0
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
+ %c = sitofp i32 %a to half
+ %r = fadd half %b, %c
+ ret half %r
+}
+
; CHECK-LABEL: test_fptrunc_float:
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
@@ -695,7 +723,7 @@ define half @test_maxnum(half %a, half %b) #0 {
; CHECK-LABEL: test_copysign:
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: movi.4s v2, #128, lsl #24
; CHECK-NEXT: bit.16b v0, v1, v2
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
@@ -706,7 +734,7 @@ define half @test_copysign(half %a, half %b) #0 {
; CHECK-LABEL: test_copysign_f32:
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: movi.4s v2, #128, lsl #24
; CHECK-NEXT: bit.16b v0, v1, v2
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
@@ -719,7 +747,7 @@ define half @test_copysign_f32(half %a, float %b) #0 {
; CHECK-LABEL: test_copysign_f64:
; CHECK-NEXT: fcvt s1, d1
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: movi.4s v2, #128, lsl #24
; CHECK-NEXT: bit.16b v0, v1, v2
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
@@ -735,7 +763,7 @@ define half @test_copysign_f64(half %a, double %b) #0 {
; CHECK-LABEL: test_copysign_extended:
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: movi.4s v2, #128, lsl #24
; CHECK-NEXT: bit.16b v0, v1, v2
; CHECK-NEXT: ret
define float @test_copysign_extended(half %a, half %b) #0 {
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
index e04a62b85c8e..2240296c89ff 100644
--- a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -19,8 +19,8 @@ bb4:
}
; CHECK-LABEL: test_and
-; CHECK: cbz w0, {{LBB[0-9]+_2}}
-; CHECK: cbnz w1, {{LBB[0-9]+_3}}
+; CHECK: cbnz w0, {{LBB[0-9]+_2}}
+; CHECK: cbz w1, {{LBB[0-9]+_1}}
define i64 @test_and(i32 %a, i32 %b) {
bb1:
%0 = icmp ne i32 %a, 0
diff --git a/test/CodeGen/AArch64/fast-isel-cmp-vec.ll b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
index 2855419a1ca0..2a0139ed9b08 100644
--- a/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
+++ b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
@@ -11,7 +11,7 @@ define <2 x i32> @icmp_v2i32(<2 x i32> %a) {
; CHECK: ; BB#0:
; CHECK-NEXT: cmeq.2s [[CMP:v[0-9]+]], v0, #0
; CHECK-NEXT: ; BB#1:
-; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #1
; CHECK-NEXT: and.8b v0, [[CMP]], [[MASK]]
; CHECK-NEXT: ret
%c = icmp eq <2 x i32> %a, zeroinitializer
@@ -26,7 +26,7 @@ define <2 x i32> @icmp_constfold_v2i32(<2 x i32> %a) {
; CHECK: ; BB#0:
; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff
; CHECK-NEXT: ; BB#1:
-; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #1
; CHECK-NEXT: and.8b v0, v[[CMP]], [[MASK]]
; CHECK-NEXT: ret
%1 = icmp eq <2 x i32> %a, %a
@@ -42,7 +42,7 @@ define <4 x i32> @icmp_v4i32(<4 x i32> %a) {
; CHECK-NEXT: cmeq.4s [[CMP:v[0-9]+]], v0, #0
; CHECK-NEXT: xtn.4h [[CMPV4I16:v[0-9]+]], [[CMP]]
; CHECK-NEXT: ; BB#1:
-; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #1
; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], [[CMPV4I16]], [[MASK]]
; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0
; CHECK-NEXT: ret
@@ -58,7 +58,7 @@ define <4 x i32> @icmp_constfold_v4i32(<4 x i32> %a) {
; CHECK: ; BB#0:
; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff
; CHECK-NEXT: ; BB#1:
-; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #1
; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], v[[CMP]], [[MASK]]
; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0
; CHECK-NEXT: ret
@@ -74,7 +74,7 @@ define <16 x i8> @icmp_v16i8(<16 x i8> %a) {
; CHECK: ; BB#0:
; CHECK-NEXT: cmeq.16b [[CMP:v[0-9]+]], v0, #0
; CHECK-NEXT: ; BB#1:
-; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #1
; CHECK-NEXT: and.16b v0, [[CMP]], [[MASK]]
; CHECK-NEXT: ret
%c = icmp eq <16 x i8> %a, zeroinitializer
@@ -89,7 +89,7 @@ define <16 x i8> @icmp_constfold_v16i8(<16 x i8> %a) {
; CHECK: ; BB#0:
; CHECK-NEXT: movi.2d [[CMP:v[0-9]+]], #0xffffffffffffffff
; CHECK-NEXT: ; BB#1:
-; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #1
; CHECK-NEXT: and.16b v0, [[CMP]], [[MASK]]
; CHECK-NEXT: ret
%1 = icmp eq <16 x i8> %a, %a
diff --git a/test/CodeGen/AArch64/fast-isel-gep.ll b/test/CodeGen/AArch64/fast-isel-gep.ll
index 33adcdc3c464..0cb1fd8465d4 100644
--- a/test/CodeGen/AArch64/fast-isel-gep.ll
+++ b/test/CodeGen/AArch64/fast-isel-gep.ll
@@ -33,7 +33,7 @@ define i32* @test_array3(i32* %a) {
define i32* @test_array4(i32* %a) {
; CHECK-LABEL: test_array4
-; CHECK: movz [[REG:x[0-9]+]], #0x1008
+; CHECK: mov [[REG:x[0-9]+]], #4104
; CHECK-NEXR: add x0, x0, [[REG]]
%1 = getelementptr inbounds i32, i32* %a, i64 1026
ret i32* %1
diff --git a/test/CodeGen/AArch64/fast-isel-tbz.ll b/test/CodeGen/AArch64/fast-isel-tbz.ll
index 598826763787..c35ae4230dd4 100644
--- a/test/CodeGen/AArch64/fast-isel-tbz.ll
+++ b/test/CodeGen/AArch64/fast-isel-tbz.ll
@@ -1,5 +1,5 @@
-; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s
-; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s
+; RUN: llc -disable-peephole -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -disable-peephole -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s
define i32 @icmp_eq_i8(i8 zeroext %a) {
; CHECK-LABEL: icmp_eq_i8
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
index f021eb232618..fcc852263b48 100644
--- a/test/CodeGen/AArch64/fastcc.ll
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -1,18 +1,21 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim -tailcallopt -aarch64-redzone | FileCheck %s -check-prefix CHECK-TAIL-RZ
; Without tailcallopt fastcc still means the caller cleans up the
; stack, so try to make sure this is respected.
define fastcc void @func_stack0() {
; CHECK-LABEL: func_stack0:
-; CHECK: mov x29, sp
-; CHECK: str w{{[0-9]+}}, [sp, #-32]!
+; CHECK: sub sp, sp, #48
+; CHECK: add x29, sp, #32
+; CHECK: str w{{[0-9]+}}, [sp]
; CHECK-TAIL-LABEL: func_stack0:
-; CHECK-TAIL: stp x29, x30, [sp, #-16]!
-; CHECK-TAIL-NEXT: mov x29, sp
-; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]!
+; CHECK-TAIL: sub sp, sp, #48
+; CHECK-TAIL-NEXT: stp x29, x30, [sp, #32]
+; CHECK-TAIL-NEXT: add x29, sp, #32
+; CHECK-TAIL: str w{{[0-9]+}}, [sp]
call fastcc void @func_stack8([8 x i32] undef, i32 42)
@@ -41,27 +44,29 @@ define fastcc void @func_stack0() {
; CHECK-TAIL-NOT: sub sp, sp
ret void
-; CHECK: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK: ldp x29, x30, [sp, #32]
+; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ret
-; CHECK-TAIL: mov sp, x29
-; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16
+; CHECK-TAIL: ldp x29, x30, [sp, #32]
+; CHECK-TAIL-NEXT: add sp, sp, #48
; CHECK-TAIL-NEXT: ret
}
define fastcc void @func_stack8([8 x i32], i32 %stacked) {
; CHECK-LABEL: func_stack8:
-; CHECK: stp x29, x30, [sp, #-16]!
-; CHECK: mov x29, sp
-; CHECK: str w{{[0-9]+}}, [sp, #-32]!
+; CHECK: sub sp, sp, #48
+; CHECK: stp x29, x30, [sp, #32]
+; CHECK: add x29, sp, #32
+; CHECK: str w{{[0-9]+}}, [sp]
; CHECK-TAIL-LABEL: func_stack8:
-; CHECK-TAIL: stp x29, x30, [sp, #-16]!
-; CHECK-TAIL: mov x29, sp
-; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]!
+; CHECK-TAIL: sub sp, sp, #48
+; CHECK-TAIL: stp x29, x30, [sp, #32]
+; CHECK-TAIL: add x29, sp, #32
+; CHECK-TAIL: str w{{[0-9]+}}, [sp]
call fastcc void @func_stack8([8 x i32] undef, i32 42)
@@ -90,22 +95,22 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
; CHECK-TAIL-NOT: sub sp, sp
ret void
-; CHECK: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ldp x29, x30, [sp, #32]
+; CHECK: add sp, sp, #48
; CHECK-NEXT: ret
-; CHECK-TAIL: mov sp, x29
-; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16
+; CHECK-TAIL: ldp x29, x30, [sp, #32]
+; CHECK-TAIL-NEXT: add sp, sp, #64
; CHECK-TAIL-NEXT: ret
}
define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
; CHECK-LABEL: func_stack32:
-; CHECK: mov x29, sp
+; CHECK: add x29, sp, #32
; CHECK-TAIL-LABEL: func_stack32:
-; CHECK-TAIL: mov x29, sp
+; CHECK-TAIL: add x29, sp, #32
call fastcc void @func_stack8([8 x i32] undef, i32 42)
@@ -134,11 +139,99 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
; CHECK-TAIL-NOT: sub sp, sp
ret void
-; CHECK: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK: ldp x29, x30, [sp, #32]
+; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ret
-; CHECK-TAIL: mov sp, x29
-; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16
+; CHECK-TAIL: ldp x29, x30, [sp, #32]
+; CHECK-TAIL-NEXT: add sp, sp, #80
; CHECK-TAIL-NEXT: ret
}
+
+; Check that arg stack pop is done after callee-save restore when no frame pointer is used.
+define fastcc void @func_stack32_leaf([8 x i32], i128 %stacked0, i128 %stacked1) {
+; CHECK-LABEL: func_stack32_leaf:
+; CHECK: str x20, [sp, #-16]!
+; CHECK: nop
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ldr x20, [sp], #16
+; CHECK-NEXT: ret
+
+; CHECK-TAIL-LABEL: func_stack32_leaf:
+; CHECK-TAIL: str x20, [sp, #-16]!
+; CHECK-TAIL: nop
+; CHECK-TAIL-NEXT: //NO_APP
+; CHECK-TAIL-NEXT: ldr x20, [sp], #16
+; CHECK-TAIL-NEXT: add sp, sp, #32
+; CHECK-TAIL-NEXT: ret
+
+; CHECK-TAIL-RZ-LABEL: func_stack32_leaf:
+; CHECK-TAIL-RZ: str x20, [sp, #-16]!
+; CHECK-TAIL-RZ-NOT: sub sp, sp
+; CHECK-TAIL-RZ: nop
+; CHECK-TAIL-RZ-NEXT: //NO_APP
+; CHECK-TAIL-RZ-NEXT: ldr x20, [sp], #16
+; CHECK-TAIL-RZ-NEXT: add sp, sp, #32
+; CHECK-TAIL-RZ-NEXT: ret
+
+ ; Make sure there is a callee-save register to save/restore.
+ call void asm sideeffect "nop", "~{x20}"() nounwind
+ ret void
+}
+
+; Check that arg stack pop is done after callee-save restore when no frame pointer is used.
+define fastcc void @func_stack32_leaf_local([8 x i32], i128 %stacked0, i128 %stacked1) {
+; CHECK-LABEL: func_stack32_leaf_local:
+; CHECK: sub sp, sp, #32
+; CHECK-NEXT: str x20, [sp, #16]
+; CHECK: nop
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ldr x20, [sp, #16]
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+
+; CHECK-TAIL-LABEL: func_stack32_leaf_local:
+; CHECK-TAIL: sub sp, sp, #32
+; CHECK-TAIL-NEXT: str x20, [sp, #16]
+; CHECK-TAIL: nop
+; CHECK-TAIL-NEXT: //NO_APP
+; CHECK-TAIL-NEXT: ldr x20, [sp, #16]
+; CHECK-TAIL-NEXT: add sp, sp, #64
+; CHECK-TAIL-NEXT: ret
+
+; CHECK-TAIL-RZ-LABEL: func_stack32_leaf_local:
+; CHECK-TAIL-RZ: str x20, [sp, #-16]!
+; CHECK-TAIL-RZ-NOT: sub sp, sp
+; CHECK-TAIL-RZ: nop
+; CHECK-TAIL-RZ-NEXT: //NO_APP
+; CHECK-TAIL-RZ-NEXT: ldr x20, [sp], #16
+; CHECK-TAIL-RZ-NEXT: add sp, sp, #32
+; CHECK-TAIL-RZ-NEXT: ret
+
+ %val0 = alloca [2 x i64], align 8
+
+ ; Make sure there is a callee-save register to save/restore.
+ call void asm sideeffect "nop", "~{x20}"() nounwind
+ ret void
+}
+
+; Check that arg stack pop is done after callee-save restore when no frame pointer is used.
+define fastcc void @func_stack32_leaf_local_nocs([8 x i32], i128 %stacked0, i128 %stacked1) {
+; CHECK-LABEL: func_stack32_leaf_local_nocs:
+; CHECK: sub sp, sp, #16
+; CHECK: add sp, sp, #16
+; CHECK-NEXT: ret
+
+; CHECK-TAIL-LABEL: func_stack32_leaf_local_nocs:
+; CHECK-TAIL: sub sp, sp, #16
+; CHECK-TAIL: add sp, sp, #48
+; CHECK-TAIL-NEXT: ret
+
+; CHECK-TAIL-RZ-LABEL: func_stack32_leaf_local_nocs:
+; CHECK-TAIL-RZ: add sp, sp, #32
+; CHECK-TAIL-RZ-NEXT: ret
+
+ %val0 = alloca [2 x i64], align 8
+
+ ret void
+}
diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll
index d549c7e78421..e52b601b1454 100644
--- a/test/CodeGen/AArch64/fcvt-int.ll
+++ b/test/CodeGen/AArch64/fcvt-int.ll
@@ -149,3 +149,28 @@ define double @test_bitcasti64todouble(i64 %in) {
ret double %res
}
+
+define double @bitcast_fabs(double %x) {
+; CHECK-LABEL: bitcast_fabs:
+; CHECK: ; BB#0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: ret
+;
+ %bc1 = bitcast double %x to i64
+ %and = and i64 %bc1, 9223372036854775807
+ %bc2 = bitcast i64 %and to double
+ ret double %bc2
+}
+
+define float @bitcast_fneg(float %x) {
+; CHECK-LABEL: bitcast_fneg:
+; CHECK: ; BB#0:
+; CHECK-NEXT: fneg s0, s0
+; CHECK-NEXT: ret
+;
+ %bc1 = bitcast float %x to i32
+ %xor = xor i32 %bc1, 2147483648
+ %bc2 = bitcast i32 %xor to float
+ ret float %bc2
+}
+
diff --git a/test/CodeGen/AArch64/fcvt_combine.ll b/test/CodeGen/AArch64/fcvt_combine.ll
index 093ce4a4cd85..5644fa28533b 100644
--- a/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/test/CodeGen/AArch64/fcvt_combine.ll
@@ -152,3 +152,11 @@ define <2 x i32> @test14(<2 x float> %f) {
%vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
ret <2 x i32> %vcvt.i
}
+
+; CHECK-LABEL: test_illegal_fp_to_int:
+; CHECK: fcvtzs.4s v0, v0, #2
+define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) {
+ %scale = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
+ %val = fptosi <3 x float> %scale to <3 x i32>
+ ret <3 x i32> %val
+}
diff --git a/test/CodeGen/AArch64/fdiv-combine.ll b/test/CodeGen/AArch64/fdiv-combine.ll
index 389eefd97b28..9ec64a854ca3 100644
--- a/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/test/CodeGen/AArch64/fdiv-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=aarch64 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
; Following test cases check:
; a / D; b / D; c / D;
@@ -6,8 +6,8 @@
; recip = 1.0 / D; a * recip; b * recip; c * recip;
define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
; CHECK-LABEL: three_fdiv_float:
-; CHECK: fdiv
-; CHECK-NEXT-NOT: fdiv
+; CHECK: fdiv s
+; CHECK-NOT: fdiv
; CHECK: fmul
; CHECK: fmul
; CHECK: fmul
@@ -20,8 +20,8 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
; CHECK-LABEL: three_fdiv_double:
-; CHECK: fdiv
-; CHECK-NEXT-NOT: fdiv
+; CHECK: fdiv d
+; CHECK-NOT: fdiv
; CHECK: fmul
; CHECK: fmul
; CHECK: fmul
@@ -34,8 +34,8 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; CHECK-LABEL: three_fdiv_4xfloat:
-; CHECK: fdiv
-; CHECK-NEXT-NOT: fdiv
+; CHECK: fdiv v
+; CHECK-NOT: fdiv
; CHECK: fmul
; CHECK: fmul
; CHECK: fmul
@@ -48,8 +48,8 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
; CHECK-LABEL: three_fdiv_2xdouble:
-; CHECK: fdiv
-; CHECK-NEXT-NOT: fdiv
+; CHECK: fdiv v
+; CHECK-NOT: fdiv
; CHECK: fmul
; CHECK: fmul
; CHECK: fmul
@@ -64,9 +64,9 @@ define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double>
; calculates a reciprocal.
define void @two_fdiv_float(float %D, float %a, float %b) #0 {
; CHECK-LABEL: two_fdiv_float:
-; CHECK: fdiv
-; CHECK: fdiv
-; CHECK-NEXT-NOT: fmul
+; CHECK: fdiv s
+; CHECK: fdiv s
+; CHECK-NOT: fmul
%div = fdiv float %a, %D
%div1 = fdiv float %b, %D
tail call void @foo_2f(float %div, float %div1)
@@ -75,9 +75,9 @@ define void @two_fdiv_float(float %D, float %a, float %b) #0 {
define void @two_fdiv_double(double %D, double %a, double %b) #0 {
; CHECK-LABEL: two_fdiv_double:
-; CHECK: fdiv
-; CHECK: fdiv
-; CHECK-NEXT-NOT: fmul
+; CHECK: fdiv d
+; CHECK: fdiv d
+; CHECK-NOT: fmul
%div = fdiv double %a, %D
%div1 = fdiv double %b, %D
tail call void @foo_2d(double %div, double %div1)
diff --git a/test/CodeGen/AArch64/fdiv_combine.ll b/test/CodeGen/AArch64/fdiv_combine.ll
index 6f38a267ec3f..8ebee3c68287 100644
--- a/test/CodeGen/AArch64/fdiv_combine.ll
+++ b/test/CodeGen/AArch64/fdiv_combine.ll
@@ -38,7 +38,7 @@ entry:
; Test which should not fold due to power of 2 out of range.
; CHECK-LABEL: @test4
; CHECK: scvtf.2s v0, v0
-; CHECK: movi.2s v1, #0x50, lsl #24
+; CHECK: movi.2s v1, #80, lsl #24
; CHECK: fdiv.2s v0, v0, v1
; CHECK: ret
define <2 x float> @test4(<2 x i32> %in) {
@@ -96,7 +96,7 @@ define <4 x float> @test8(<4 x i16> %in) {
; CHECK-LABEL: @test9
; CHECK: ucvtf.2d v0, v0
; CHECK: fcvtn v0.2s, v0.2d
-; CHECK: movi.2s v1, #0x40, lsl #24
+; CHECK: movi.2s v1, #64, lsl #24
; CHECK: fdiv.2s v0, v0, v1
; CHECK: ret
define <2 x float> @test9(<2 x i64> %in) {
diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll
index b4f4d77cd0bc..4d9cb21ddc3d 100644
--- a/test/CodeGen/AArch64/fp-cond-sel.ll
+++ b/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s
@varfloat = global float 0.0
@vardouble = global double 0.0
@@ -12,8 +12,8 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
%tst1 = icmp ugt i32 %lhs32, %rhs32
%val1 = select i1 %tst1, float 0.0, float 1.0
store float %val1, float* @varfloat
-; CHECK: movi v[[FLT0:[0-9]+]].2d, #0
-; CHECK: fmov s[[FLT1:[0-9]+]], #1.0
+; CHECK-DAG: movi v[[FLT0:[0-9]+]].2d, #0
+; CHECK-DAG: fmov s[[FLT1:[0-9]+]], #1.0
; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
%rhs64 = sext i32 %rhs32 to i64
diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll
index b892f1902b03..b39ff08db39a 100644
--- a/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -15,7 +15,7 @@ entry:
define <4 x half> @build_h4(<4 x half> %a) {
entry:
; CHECK-LABEL: build_h4:
-; CHECK: movz [[GPR:w[0-9]+]], #0x3ccd
+; CHECK: mov [[GPR:w[0-9]+]], #15565
; CHECK: dup v0.4h, [[GPR]]
ret <4 x half> <half 0xH3CCD, half 0xH3CCD, half 0xH3CCD, half 0xH3CCD>
}
@@ -176,7 +176,7 @@ define <4 x half> @sitofp_i64(<4 x i64> %a) #0 {
define <4 x half> @uitofp_i8(<4 x i8> %a) #0 {
; CHECK-LABEL: uitofp_i8:
-; CHECK-NEXT: bic v0.4h, #0xff, lsl #8
+; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0
; CHECK-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
; CHECK-NEXT: fcvtn v0.4h, [[OP2]]
@@ -277,10 +277,10 @@ define <4 x i16> @fptoui_i16(<4 x half> %a) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, ne
-; CHECK-DAG: csel {{.*}}, wzr, ne
-; CHECK-DAG: csel {{.*}}, wzr, ne
-; CHECK-DAG: csel {{.*}}, wzr, ne
+; CHECK-DAG: csetm {{.*}}, ne
+; CHECK-DAG: csetm {{.*}}, ne
+; CHECK-DAG: csetm {{.*}}, ne
+; CHECK-DAG: csetm {{.*}}, ne
define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp une <4 x half> %a, %b
ret <4 x i1> %1
@@ -296,14 +296,14 @@ define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, eq
-; CHECK-DAG: csel {{.*}}, wzr, eq
-; CHECK-DAG: csel {{.*}}, wzr, eq
-; CHECK-DAG: csel {{.*}}, wzr, eq
-; CHECK-DAG: csel {{.*}}, vs
-; CHECK-DAG: csel {{.*}}, vs
-; CHECK-DAG: csel {{.*}}, vs
-; CHECK-DAG: csel {{.*}}, vs
+; CHECK-DAG: csetm [[REG1:w[0-9]+]], eq
+; CHECK-DAG: csetm [[REG2:w[0-9]+]], eq
+; CHECK-DAG: csetm [[REG3:w[0-9]+]], eq
+; CHECK-DAG: csetm [[REG4:w[0-9]+]], eq
+; CHECK-DAG: csinv {{.*}}, [[REG1]], wzr, vc
+; CHECK-DAG: csinv {{.*}}, [[REG2]], wzr, vc
+; CHECK-DAG: csinv {{.*}}, [[REG3]], wzr, vc
+; CHECK-DAG: csinv {{.*}}, [[REG4]], wzr, vc
define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp ueq <4 x half> %a, %b
ret <4 x i1> %1
@@ -319,10 +319,10 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, hi
-; CHECK-DAG: csel {{.*}}, wzr, hi
-; CHECK-DAG: csel {{.*}}, wzr, hi
-; CHECK-DAG: csel {{.*}}, wzr, hi
+; CHECK-DAG: csetm {{.*}}, hi
+; CHECK-DAG: csetm {{.*}}, hi
+; CHECK-DAG: csetm {{.*}}, hi
+; CHECK-DAG: csetm {{.*}}, hi
define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp ugt <4 x half> %a, %b
ret <4 x i1> %1
@@ -338,10 +338,10 @@ define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, pl
-; CHECK-DAG: csel {{.*}}, wzr, pl
-; CHECK-DAG: csel {{.*}}, wzr, pl
-; CHECK-DAG: csel {{.*}}, wzr, pl
+; CHECK-DAG: csetm {{.*}}, pl
+; CHECK-DAG: csetm {{.*}}, pl
+; CHECK-DAG: csetm {{.*}}, pl
+; CHECK-DAG: csetm {{.*}}, pl
define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp uge <4 x half> %a, %b
ret <4 x i1> %1
@@ -357,10 +357,10 @@ define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, lt
-; CHECK-DAG: csel {{.*}}, wzr, lt
-; CHECK-DAG: csel {{.*}}, wzr, lt
-; CHECK-DAG: csel {{.*}}, wzr, lt
+; CHECK-DAG: csetm {{.*}}, lt
+; CHECK-DAG: csetm {{.*}}, lt
+; CHECK-DAG: csetm {{.*}}, lt
+; CHECK-DAG: csetm {{.*}}, lt
define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp ult <4 x half> %a, %b
ret <4 x i1> %1
@@ -376,10 +376,10 @@ define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, le
-; CHECK-DAG: csel {{.*}}, wzr, le
-; CHECK-DAG: csel {{.*}}, wzr, le
-; CHECK-DAG: csel {{.*}}, wzr, le
+; CHECK-DAG: csetm {{.*}}, le
+; CHECK-DAG: csetm {{.*}}, le
+; CHECK-DAG: csetm {{.*}}, le
+; CHECK-DAG: csetm {{.*}}, le
define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp ule <4 x half> %a, %b
ret <4 x i1> %1
@@ -395,10 +395,10 @@ define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, vs
-; CHECK-DAG: csel {{.*}}, wzr, vs
-; CHECK-DAG: csel {{.*}}, wzr, vs
-; CHECK-DAG: csel {{.*}}, wzr, vs
+; CHECK-DAG: csetm {{.*}}, vs
+; CHECK-DAG: csetm {{.*}}, vs
+; CHECK-DAG: csetm {{.*}}, vs
+; CHECK-DAG: csetm {{.*}}, vs
define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp uno <4 x half> %a, %b
ret <4 x i1> %1
@@ -414,14 +414,15 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, mi
-; CHECK-DAG: csel {{.*}}, wzr, mi
-; CHECK-DAG: csel {{.*}}, wzr, mi
-; CHECK-DAG: csel {{.*}}, wzr, mi
-; CHECK-DAG: csel {{.*}}, gt
-; CHECK-DAG: csel {{.*}}, gt
-; CHECK-DAG: csel {{.*}}, gt
-; CHECK-DAG: csel {{.*}}, gt
+; CHECK-DAG: csetm [[REG1:w[0-9]+]], mi
+; CHECK-DAG: csetm [[REG2:w[0-9]+]], mi
+; CHECK-DAG: csetm [[REG3:w[0-9]+]], mi
+; CHECK-DAG: csetm [[REG4:w[0-9]+]], mi
+; CHECK-DAG: csinv {{.*}}, [[REG1]], wzr, le
+; CHECK-DAG: csinv {{.*}}, [[REG2]], wzr, le
+; CHECK-DAG: csinv {{.*}}, [[REG3]], wzr, le
+; CHECK-DAG: csinv {{.*}}, [[REG4]], wzr, le
+
define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp one <4 x half> %a, %b
ret <4 x i1> %1
@@ -437,10 +438,10 @@ define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, eq
-; CHECK-DAG: csel {{.*}}, wzr, eq
-; CHECK-DAG: csel {{.*}}, wzr, eq
-; CHECK-DAG: csel {{.*}}, wzr, eq
+; CHECK-DAG: csetm {{.*}}, eq
+; CHECK-DAG: csetm {{.*}}, eq
+; CHECK-DAG: csetm {{.*}}, eq
+; CHECK-DAG: csetm {{.*}}, eq
define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp oeq <4 x half> %a, %b
ret <4 x i1> %1
@@ -456,10 +457,10 @@ define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, gt
-; CHECK-DAG: csel {{.*}}, wzr, gt
-; CHECK-DAG: csel {{.*}}, wzr, gt
-; CHECK-DAG: csel {{.*}}, wzr, gt
+; CHECK-DAG: csetm {{.*}}, gt
+; CHECK-DAG: csetm {{.*}}, gt
+; CHECK-DAG: csetm {{.*}}, gt
+; CHECK-DAG: csetm {{.*}}, gt
define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp ogt <4 x half> %a, %b
ret <4 x i1> %1
@@ -475,10 +476,10 @@ define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, ge
-; CHECK-DAG: csel {{.*}}, wzr, ge
-; CHECK-DAG: csel {{.*}}, wzr, ge
-; CHECK-DAG: csel {{.*}}, wzr, ge
+; CHECK-DAG: csetm {{.*}}, ge
+; CHECK-DAG: csetm {{.*}}, ge
+; CHECK-DAG: csetm {{.*}}, ge
+; CHECK-DAG: csetm {{.*}}, ge
define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp oge <4 x half> %a, %b
ret <4 x i1> %1
@@ -494,10 +495,10 @@ define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, mi
-; CHECK-DAG: csel {{.*}}, wzr, mi
-; CHECK-DAG: csel {{.*}}, wzr, mi
-; CHECK-DAG: csel {{.*}}, wzr, mi
+; CHECK-DAG: csetm {{.*}}, mi
+; CHECK-DAG: csetm {{.*}}, mi
+; CHECK-DAG: csetm {{.*}}, mi
+; CHECK-DAG: csetm {{.*}}, mi
define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp olt <4 x half> %a, %b
ret <4 x i1> %1
@@ -513,10 +514,10 @@ define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, ls
-; CHECK-DAG: csel {{.*}}, wzr, ls
-; CHECK-DAG: csel {{.*}}, wzr, ls
-; CHECK-DAG: csel {{.*}}, wzr, ls
+; CHECK-DAG: csetm {{.*}}, ls
+; CHECK-DAG: csetm {{.*}}, ls
+; CHECK-DAG: csetm {{.*}}, ls
+; CHECK-DAG: csetm {{.*}}, ls
define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp ole <4 x half> %a, %b
ret <4 x i1> %1
@@ -532,10 +533,10 @@ define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
; CHECK-DAG: fcvt
-; CHECK-DAG: csel {{.*}}, wzr, vc
-; CHECK-DAG: csel {{.*}}, wzr, vc
-; CHECK-DAG: csel {{.*}}, wzr, vc
-; CHECK-DAG: csel {{.*}}, wzr, vc
+; CHECK-DAG: csetm {{.*}}, vc
+; CHECK-DAG: csetm {{.*}}, vc
+; CHECK-DAG: csetm {{.*}}, vc
+; CHECK-DAG: csetm {{.*}}, vc
define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 {
%1 = fcmp ord <4 x half> %a, %b
ret <4 x i1> %1
diff --git a/test/CodeGen/AArch64/fp16-vector-nvcast.ll b/test/CodeGen/AArch64/fp16-vector-nvcast.ll
index 83e0df74c3da..018c88c5f3eb 100644
--- a/test/CodeGen/AArch64/fp16-vector-nvcast.ll
+++ b/test/CodeGen/AArch64/fp16-vector-nvcast.ll
@@ -3,7 +3,7 @@
; Test pattern (v4f16 (AArch64NvCast (v2i32 FPR64:$src)))
define void @nvcast_v2i32(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v2i32:
-; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #0xab, lsl #16
+; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #171, lsl #16
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <4 x half>* %a
@@ -14,7 +14,7 @@ define void @nvcast_v2i32(<4 x half>* %a) #0 {
; Test pattern (v4f16 (AArch64NvCast (v4i16 FPR64:$src)))
define void @nvcast_v4i16(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v4i16:
-; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #0xab
+; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #171
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <4 x half>* %a
@@ -25,7 +25,7 @@ define void @nvcast_v4i16(<4 x half>* %a) #0 {
; Test pattern (v4f16 (AArch64NvCast (v8i8 FPR64:$src)))
define void @nvcast_v8i8(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v8i8:
-; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #0xab
+; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #171
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <4 x half>* %a
@@ -46,7 +46,7 @@ define void @nvcast_f64(<4 x half>* %a) #0 {
; Test pattern (v8f16 (AArch64NvCast (v4i32 FPR128:$src)))
define void @nvcast_v4i32(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v4i32:
-; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #0xab, lsl #16
+; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #171, lsl #16
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <8 x half>* %a
@@ -57,7 +57,7 @@ define void @nvcast_v4i32(<8 x half>* %a) #0 {
; Test pattern (v8f16 (AArch64NvCast (v8i16 FPR128:$src)))
define void @nvcast_v8i16(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v8i16:
-; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #0xab
+; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #171
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <8 x half>* %a
@@ -68,7 +68,7 @@ define void @nvcast_v8i16(<8 x half>* %a) #0 {
; Test pattern (v8f16 (AArch64NvCast (v16i8 FPR128:$src)))
define void @nvcast_v16i8(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v16i8:
-; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #0xab
+; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #171
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <8 x half>* %a
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index 6acb11108afc..b4faef750a2c 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -38,20 +38,19 @@ define void @check_double() {
}
; LARGE-LABEL: check_float2
-; LARGE: movz [[REG:w[0-9]+]], #0x4049, lsl #16
-; LARGE-NEXT: movk [[REG]], #0xfdb
+; LARGE: mov [[REG:w[0-9]+]], #1078525952
+; LARGE-NEXT: movk [[REG]], #4059
; LARGE-NEXT: fmov s0, [[REG]]
define float @check_float2() {
ret float 3.14159274101257324218750
}
; LARGE-LABEL: check_double2
-; LARGE: movz [[REG:x[0-9]+]], #0x4009, lsl #48
-; LARGE-NEXT: movk [[REG]], #0x21fb, lsl #32
-; LARGE-NEXT: movk [[REG]], #0x5444, lsl #16
-; LARGE-NEXT: movk [[REG]], #0x2d18
+; LARGE: mov [[REG:x[0-9]+]], #4614219293217783808
+; LARGE-NEXT: movk [[REG]], #8699, lsl #32
+; LARGE-NEXT: movk [[REG]], #21572, lsl #16
+; LARGE-NEXT: movk [[REG]], #11544
; LARGE-NEXT: fmov d0, [[REG]]
define double @check_double2() {
ret double 3.1415926535897931159979634685441851615905761718750
}
-
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index 2ea13e388867..cf6545dab385 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-post-ra | FileCheck --check-prefix=CHECK %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-post-ra | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -disable-post-ra | FileCheck --check-prefix=CHECK-NOFP %s
%myStruct = type { i64 , i8, i32 }
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index 2f45666ba13a..40ed607b06cc 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK-NONEON %s
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK-BE %s
@@ -89,11 +89,11 @@ define void @check_stack_args() {
; that varstruct is passed on the stack. Rather dependent on how a
; memcpy gets created, but the following works for now.
-; CHECK-DAG: str {{q[0-9]+}}, [sp, #-16]
+; CHECK-DAG: str {{q[0-9]+}}, [sp]
; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b
-; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp, #-16]!
+; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp]
; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]]
@@ -104,10 +104,10 @@ define void @check_stack_args() {
float -2.0, float -8.0, float 16.0, float 1.0,
float 64.0)
-; CHECK: movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
+; CHECK: mov [[SIXTY_FOUR:w[0-9]+]], #1115684864
; CHECK: str [[SIXTY_FOUR]], [sp]
-; CHECK-NONEON: movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
+; CHECK-NONEON: mov [[SIXTY_FOUR:w[0-9]+]], #1115684864
; CHECK-NONEON: str [[SIXTY_FOUR]], [sp]
; CHECK: bl stacked_fpu
@@ -139,9 +139,9 @@ define void @check_i128_align() {
call void @check_i128_regalign(i32 0, i128 42)
; CHECK-NOT: mov x1
-; CHECK-LE: movz x2, #{{0x2a|42}}
+; CHECK-LE: mov x2, #{{0x2a|42}}
; CHECK-LE: mov x3, xzr
-; CHECK-BE: movz {{x|w}}3, #{{0x2a|42}}
+; CHECK-BE: mov {{x|w}}3, #{{0x2a|42}}
; CHECK-BE: mov x2, xzr
; CHECK: bl check_i128_regalign
diff --git a/test/CodeGen/AArch64/gep-nullptr.ll b/test/CodeGen/AArch64/gep-nullptr.ll
new file mode 100644
index 000000000000..4c2bc504cd04
--- /dev/null
+++ b/test/CodeGen/AArch64/gep-nullptr.ll
@@ -0,0 +1,23 @@
+; RUN: llc -O3 -aarch64-gep-opt=true < %s |FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n8:16:32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%structA = type { i8, i8, i8, i8, i8, i8, [4 x i8], i8, i8, [2 x i32], [2 x %unionMV], [4 x [2 x %unionMV]], [4 x [2 x %unionMV]], [4 x i8], i8*, i8*, i32, i8* }
+%unionMV = type { i32 }
+
+; Function Attrs: nounwind
+define void @test(%structA* %mi_block) {
+entry:
+ br i1 undef, label %for.body13.us, label %if.else
+
+; Just make sure we don't get a compiler ICE due to dereferncing a nullptr.
+; CHECK-LABEL: test
+for.body13.us: ; preds = %entry
+ %indvars.iv.next40 = or i64 0, 1
+ %packed4.i.us.1 = getelementptr inbounds %structA, %structA* %mi_block, i64 0, i32 11, i64 0, i64 %indvars.iv.next40, i32 0
+ unreachable
+
+if.else: ; preds = %entry
+ ret void
+}
+
diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll
index 6895380ca63e..481be4017b00 100644
--- a/test/CodeGen/AArch64/global-merge-3.ll
+++ b/test/CodeGen/AArch64/global-merge-3.ll
@@ -21,7 +21,7 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
}
;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals
-;CHECK: .align 4
+;CHECK: .p2align 4
;CHECK: .L_MergedGlobals:
;CHECK: .size .L_MergedGlobals, 4004
@@ -29,7 +29,7 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
;CHECK: .local .L_MergedGlobals.1
;CHECK: .comm .L_MergedGlobals.1,4000,16
-;CHECK-APPLE-IOS: .align 4
+;CHECK-APPLE-IOS: .p2align 4
;CHECK-APPLE-IOS: l__MergedGlobals:
;CHECK-APPLE-IOS: .long 1
;CHECK-APPLE-IOS: .space 4000
diff --git a/test/CodeGen/AArch64/global-merge-group-by-use.ll b/test/CodeGen/AArch64/global-merge-group-by-use.ll
index 8b3fc97c9e2e..434c787b28da 100644
--- a/test/CodeGen/AArch64/global-merge-group-by-use.ll
+++ b/test/CodeGen/AArch64/global-merge-group-by-use.ll
@@ -64,8 +64,8 @@ define void @f3(i32 %a1, i32 %a2) #0 {
define void @f4(i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-NEXT: adrp x8, [[SET3]]@PAGE
; CHECK-NEXT: add x8, x8, [[SET3]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #4]
-; CHECK-NEXT: str w2, [x8]
+; CHECK-NEXT: stp w2, w0, [x8]
+; CHECK-NEXT: str w1, [x8, #8]
; CHECK-NEXT: ret
store i32 %a1, i32* @m4, align 4
store i32 %a2, i32* @n4, align 4
diff --git a/test/CodeGen/AArch64/half.ll b/test/CodeGen/AArch64/half.ll
index d4cbbc918a84..154d85c9bb61 100644
--- a/test/CodeGen/AArch64/half.ll
+++ b/test/CodeGen/AArch64/half.ll
@@ -81,3 +81,15 @@ define void @test_trunc64(double %in, half* %addr) {
store half %val16, half* %addr
ret void
}
+
+define i16 @test_fccmp(i1 %a) {
+;CHECK-LABEL: test_fccmp:
+;CHECK: fcmp
+ %cmp0 = fcmp ogt half 0xH3333, undef
+ %cmp1 = fcmp ogt half 0xH2222, undef
+ %x = select i1 %cmp0, i16 0, i16 undef
+ %or = or i1 %cmp1, %cmp0
+ %y = select i1 %or, i16 4, i16 undef
+ %r = add i16 %x, %y
+ ret i16 %r
+}
diff --git a/test/CodeGen/AArch64/hints.ll b/test/CodeGen/AArch64/hints.ll
index d7d9e23af1f1..f23c7b00f224 100644
--- a/test/CodeGen/AArch64/hints.ll
+++ b/test/CodeGen/AArch64/hints.ll
@@ -63,5 +63,5 @@ entry:
}
; CHECK-LABEL: hint_undefined
-; CHECK: hint #0x8
+; CHECK: hint #8
diff --git a/test/CodeGen/AArch64/inlineasm-X-allocation.ll b/test/CodeGen/AArch64/inlineasm-X-allocation.ll
new file mode 100644
index 000000000000..1d7a24e3e6e7
--- /dev/null
+++ b/test/CodeGen/AArch64/inlineasm-X-allocation.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=-fp-armv8 %s -o - | FileCheck %s -check-prefix=nofp
+
+; In the novfp case, the compiler is forced to assign a core register,
+; even if the input is a float.
+
+; nofp-LABEL: f1
+; nofp-CHECK: ldr x0, [sp]
+
+; This can be generated by a function such as:
+; void f1(float f) {asm volatile ("ldr $0, [sp]" : : "X" (f));}
+
+define void @f1(float %f) {
+entry:
+ call void asm sideeffect "ldr $0, [sp]", "X" (float %f) nounwind
+
+ ret void
+}
diff --git a/test/CodeGen/AArch64/inlineasm-X-constraint.ll b/test/CodeGen/AArch64/inlineasm-X-constraint.ll
new file mode 100644
index 000000000000..77652cc071ef
--- /dev/null
+++ b/test/CodeGen/AArch64/inlineasm-X-constraint.ll
@@ -0,0 +1,152 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o - | FileCheck %s
+
+; The following functions test the use case where an X constraint is used to
+; add a dependency between an assembly instruction (vmsr in this case) and
+; another instruction. In each function, we use a different type for the
+; X constraint argument.
+;
+; We can something similar from the following C code:
+; double f1(double f, int pscr_value) {
+; asm volatile("msr fpsr,%1" : "=X" ((f)): "r" (pscr_value));
+; return f+f;
+; }
+
+; CHECK-LABEL: f1
+; CHECK: msr FPSR
+; CHECK: fadd d
+
+define double @f1(double %f, i32 %pscr_value) {
+entry:
+ %f.addr = alloca double, align 8
+ store double %f, double* %f.addr, align 8
+ call void asm sideeffect "msr fpsr,$1", "=*X,r"(double* nonnull %f.addr, i32 %pscr_value) nounwind
+ %0 = load double, double* %f.addr, align 8
+ %add = fadd double %0, %0
+ ret double %add
+}
+
+; int f2(int f, int pscr_value) {
+; asm volatile("msr fpsr,$1" : "=X" ((f)): "r" (pscr_value));
+; return f*f;
+; }
+
+; CHECK-LABEL: f2
+; CHECK: msr FPSR
+; CHECK: mul
+define i32 @f2(i32 %f, i32 %pscr_value) {
+entry:
+ %f.addr = alloca i32, align 4
+ store i32 %f, i32* %f.addr, align 4
+ call void asm sideeffect "msr fpsr,$1", "=*X,r"(i32* nonnull %f.addr, i32 %pscr_value) nounwind
+ %0 = load i32, i32* %f.addr, align 4
+ %mul = mul i32 %0, %0
+ ret i32 %mul
+}
+
+; typedef signed char int8_t;
+; typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+; void f3 (void)
+; {
+; int8x8_t vector_res_int8x8;
+; unsigned int fpscr;
+; asm volatile ("msr fpsr,$1" : "=X" ((vector_res_int8x8)) : "r" (fpscr));
+; return vector_res_int8x8 * vector_res_int8x8;
+; }
+
+; CHECK-LABEL: f3
+; CHECK: msr FPSR
+; CHECK: mul
+define <8 x i8> @f3() {
+entry:
+ %vector_res_int8x8 = alloca <8 x i8>, align 8
+ %0 = getelementptr inbounds <8 x i8>, <8 x i8>* %vector_res_int8x8, i32 0, i32 0
+ call void asm sideeffect "msr fpsr,$1", "=*X,r"(<8 x i8>* nonnull %vector_res_int8x8, i32 undef) nounwind
+ %1 = load <8 x i8>, <8 x i8>* %vector_res_int8x8, align 8
+ %mul = mul <8 x i8> %1, %1
+ ret <8 x i8> %mul
+}
+
+; We can emit integer constants.
+; We can get this from:
+; void f() {
+; int x = 2;
+; asm volatile ("add x0, x0, %0" : : "X" (x));
+; }
+;
+; CHECK-LABEL: f4
+; CHECK: add x0, x0, #2
+define void @f4() {
+entry:
+ tail call void asm sideeffect "add x0, x0, $0", "X"(i32 2)
+ ret void
+}
+
+; We can emit function labels. This is equivalent to the following C code:
+; void f(void) {
+; void (*x)(void) = &foo;
+; asm volatile ("bl %0" : : "X" (x));
+; }
+; CHECK-LABEL: f5
+; CHECK: bl f4
+define void @f5() {
+entry:
+ tail call void asm sideeffect "bl $0", "X"(void ()* nonnull @f4)
+ ret void
+}
+
+declare void @foo(...)
+
+; This tests the behavior of the X constraint when used on functions pointers,
+; or functions with a cast. In the first asm call we figure out that this
+; is a function pointer and emit the label. However, in the second asm call
+; we can't see through the bitcast and we end up having to lower this constraint
+; to something else. This is not ideal, but it is a correct behaviour according
+; to the definition of the X constraint.
+;
+; In this case (and other cases where we could have emitted something else),
+; what we're doing with the X constraint is not particularly useful either,
+; since the user could have used "r" in this situation for the same effect.
+
+; CHECK-LABEL: f6
+; CHECK: bl foo
+; CHECK: br x
+
+define void @f6() nounwind {
+entry:
+ tail call void asm sideeffect "bl $0", "X"(void (...)* @foo) nounwind
+ tail call void asm sideeffect "br $0", "X"(void (...)* bitcast (void ()* @f4 to void (...)*)) nounwind
+ ret void
+}
+
+; The following IR can be generated from C code with a function like:
+; void a() {
+; void* a = &&A;
+; asm volatile ("bl %0" : : "X" (a));
+; A:
+; return;
+; }
+;
+; Ideally this would give the block address of bb, but it requires us to see
+; through blockaddress, which we can't do at the moment. This might break some
+; existing use cases where a user would expect to get a block label and instead
+; gets the block address in a register. However, note that according to the
+; "no constraints" definition this behaviour is correct (although not very nice).
+
+; CHECK-LABEL: f7
+; CHECK: bl
+define void @f7() {
+ call void asm sideeffect "br $0", "X"( i8* blockaddress(@f7, %bb) )
+ br label %bb
+bb:
+ ret void
+}
+
+; If we use a constraint "=*X", we should get a store back to *%x (in x0).
+; CHECK-LABEL: f8
+; CHECK: add [[Dest:x[0-9]+]], x0, x0
+; CHECK: str [[Dest]], [x0]
+define void @f8(i64 *%x) {
+entry:
+ tail call void asm sideeffect "add $0, x0, x0", "=*X"(i64 *%x)
+ ret void
+}
diff --git a/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll b/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll
index 645214ac8ec7..ca24fc9c8807 100644
--- a/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll
+++ b/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll
@@ -5,7 +5,7 @@
; RUN: llc -mtriple=aarch64 < %s -filetype=obj | llvm-objdump -arch=aarch64 -d - | FileCheck %s
; CHECK-LABEL: foo:
-; CHECK: a0 79 95 d2 movz x0, #0xabcd
+; CHECK: a0 79 95 d2 mov x0, #43981
; CHECK: c0 03 5f d6 ret
define i32 @foo() nounwind {
entry:
@@ -22,5 +22,3 @@ entry:
%0 = tail call i32 asm sideeffect "ldr $0,=0x10001", "=r"() nounwind
ret i32 %0
}
-
-
diff --git a/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll b/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll
new file mode 100644
index 000000000000..f65694ab80a1
--- /dev/null
+++ b/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -march=aarch64 -aarch64-neon-syntax=apple -aarch64-stp-suppress=false -verify-machineinstrs -asm-verbose=false | FileCheck %s
+
+; CHECK-LABEL: test_strd_sturd:
+; CHECK-NEXT: stp d0, d1, [x0, #-8]
+; CHECK-NEXT: ret
+define void @test_strd_sturd(float* %ptr, <2 x float> %v1, <2 x float> %v2) #0 {
+ %tmp1 = bitcast float* %ptr to <2 x float>*
+ store <2 x float> %v2, <2 x float>* %tmp1, align 16
+ %add.ptr = getelementptr inbounds float, float* %ptr, i64 -2
+ %tmp = bitcast float* %add.ptr to <2 x float>*
+ store <2 x float> %v1, <2 x float>* %tmp, align 16
+ ret void
+}
+
+; CHECK-LABEL: test_sturd_strd:
+; CHECK-NEXT: stp d0, d1, [x0, #-8]
+; CHECK-NEXT: ret
+define void @test_sturd_strd(float* %ptr, <2 x float> %v1, <2 x float> %v2) #0 {
+ %add.ptr = getelementptr inbounds float, float* %ptr, i64 -2
+ %tmp = bitcast float* %add.ptr to <2 x float>*
+ store <2 x float> %v1, <2 x float>* %tmp, align 16
+ %tmp1 = bitcast float* %ptr to <2 x float>*
+ store <2 x float> %v2, <2 x float>* %tmp1, align 16
+ ret void
+}
+
+; CHECK-LABEL: test_strq_sturq:
+; CHECK-NEXT: stp q0, q1, [x0, #-16]
+; CHECK-NEXT: ret
+define void @test_strq_sturq(double* %ptr, <2 x double> %v1, <2 x double> %v2) #0 {
+ %tmp1 = bitcast double* %ptr to <2 x double>*
+ store <2 x double> %v2, <2 x double>* %tmp1, align 16
+ %add.ptr = getelementptr inbounds double, double* %ptr, i64 -2
+ %tmp = bitcast double* %add.ptr to <2 x double>*
+ store <2 x double> %v1, <2 x double>* %tmp, align 16
+ ret void
+}
+
+; CHECK-LABEL: test_sturq_strq:
+; CHECK-NEXT: stp q0, q1, [x0, #-16]
+; CHECK-NEXT: ret
+define void @test_sturq_strq(double* %ptr, <2 x double> %v1, <2 x double> %v2) #0 {
+ %add.ptr = getelementptr inbounds double, double* %ptr, i64 -2
+ %tmp = bitcast double* %add.ptr to <2 x double>*
+ store <2 x double> %v1, <2 x double>* %tmp, align 16
+ %tmp1 = bitcast double* %ptr to <2 x double>*
+ store <2 x double> %v2, <2 x double>* %tmp1, align 16
+ ret void
+}
+
+; CHECK-LABEL: test_ldrx_ldurx:
+; CHECK-NEXT: ldp [[V0:x[0-9]+]], [[V1:x[0-9]+]], [x0, #-8]
+; CHECK-NEXT: add x0, [[V0]], [[V1]]
+; CHECK-NEXT: ret
+define i64 @test_ldrx_ldurx(i64* %p) #0 {
+ %tmp = load i64, i64* %p, align 4
+ %add.ptr = getelementptr inbounds i64, i64* %p, i64 -1
+ %tmp1 = load i64, i64* %add.ptr, align 4
+ %add = add nsw i64 %tmp1, %tmp
+ ret i64 %add
+}
+
+; CHECK-LABEL: test_ldurx_ldrx:
+; CHECK-NEXT: ldp [[V0:x[0-9]+]], [[V1:x[0-9]+]], [x0, #-8]
+; CHECK-NEXT: add x0, [[V0]], [[V1]]
+; CHECK-NEXT: ret
+define i64 @test_ldurx_ldrx(i64* %p) #0 {
+ %add.ptr = getelementptr inbounds i64, i64* %p, i64 -1
+ %tmp1 = load i64, i64* %add.ptr, align 4
+ %tmp = load i64, i64* %p, align 4
+ %add = add nsw i64 %tmp1, %tmp
+ ret i64 %add
+}
+
+; CHECK-LABEL: test_ldrsw_ldursw:
+; CHECK-NEXT: ldpsw [[V0:x[0-9]+]], [[V1:x[0-9]+]], [x0, #-4]
+; CHECK-NEXT: add x0, [[V0]], [[V1]]
+; CHECK-NEXT: ret
+define i64 @test_ldrsw_ldursw(i32* %p) #0 {
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 -1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = sext i32 %tmp to i64
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ ret i64 %add
+}
+
+; Also make sure we only match valid offsets.
+; CHECK-LABEL: test_ldrq_ldruq_invalidoffset:
+; CHECK-NEXT: ldr q[[V0:[0-9]+]], [x0]
+; CHECK-NEXT: ldur q[[V1:[0-9]+]], [x0, #24]
+; CHECK-NEXT: add.2d v0, v[[V0]], v[[V1]]
+; CHECK-NEXT: ret
+define <2 x i64> @test_ldrq_ldruq_invalidoffset(i64* %p) #0 {
+ %a1 = bitcast i64* %p to <2 x i64>*
+ %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8
+ %add.ptr2 = getelementptr inbounds i64, i64* %p, i64 3
+ %a2 = bitcast i64* %add.ptr2 to <2 x i64>*
+ %tmp2 = load <2 x i64>, <2 x i64>* %a2, align 8
+ %add = add nsw <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %add
+}
+
+; Pair an unscaled store with a scaled store where the scaled store has a
+; non-zero offset. This should not hit an assert.
+; CHECK-LABEL: test_stur_str_no_assert
+; CHECK: stp xzr, xzr, [sp, #16]
+; CHECK: ret
+define void @test_stur_str_no_assert() #0 {
+entry:
+ %a1 = alloca i64, align 4
+ %a2 = alloca [12 x i8], align 4
+ %0 = bitcast i64* %a1 to i8*
+ %C = getelementptr inbounds [12 x i8], [12 x i8]* %a2, i64 0, i64 4
+ %1 = bitcast i8* %C to i64*
+ store i64 0, i64* %1, align 4
+ call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 8, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
index f4626c7e0a43..f4f77c5aa312 100644
--- a/test/CodeGen/AArch64/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -1,7 +1,5 @@
import re
-config.suffixes = ['.ll']
-
if not 'AArch64' in config.root.targets:
config.unsupported = True
diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll
index 332d660eef36..6e33ab2d0beb 100644
--- a/test/CodeGen/AArch64/local_vars.ll
+++ b/test/CodeGen/AArch64/local_vars.ll
@@ -24,24 +24,25 @@ define void @trivial_func() nounwind {
}
define void @trivial_fp_func() {
-; CHECK-WITHFP-AARCH64-LABEL: trivial_fp_func:
-; CHECK-WITHFP-AARCH64: sub sp, sp, #16
-; CHECK-WITHFP-AARCH64: stp x29, x30, [sp]
-; CHECK-WITHFP-AARCH64-NEXT: mov x29, sp
+; CHECK-LABEL: trivial_fp_func:
+; CHECK: str x30, [sp, #-16]!
+; CHECK-NOT: mov x29, sp
; CHECK-WITHFP-ARM64-LABEL: trivial_fp_func:
; CHECK-WITHFP-ARM64: stp x29, x30, [sp, #-16]!
; CHECK-WITHFP-ARM64-NEXT: mov x29, sp
; Dont't really care, but it would be a Bad Thing if this came after the epilogue.
+; CHECK-WITHFP-ARM64: bl foo
; CHECK: bl foo
call void @foo()
ret void
-; CHECK-WITHFP: ldp x29, x30, [sp]
-; CHECK-WITHFP: add sp, sp, #16
+; CHECK: ldr x30, [sp], #16
+; CHECK-NEXT: ret
-; CHECK-WITHFP: ret
+; CHECK-WITHFP-ARM64: ldp x29, x30, [sp], #16
+; CHECK-WITHFP-ARM64-NEXT: ret
}
define void @stack_local() {
diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll
index a5e4a9956de7..6f562230d937 100644
--- a/test/CodeGen/AArch64/logical-imm.ll
+++ b/test/CodeGen/AArch64/logical-imm.ll
@@ -73,11 +73,11 @@ define void @test_mov(i32 %in32, i64 %in64) {
; CHECK-LABEL: test_mov:
%val0 = add i32 %in32, 2863311530
store i32 %val0, i32* @var32
-; CHECK: orr {{w[0-9]+}}, wzr, #0xaaaaaaaa
+; CHECK: mov {{w[0-9]+}}, #-1431655766
%val1 = add i64 %in64, 11068046444225730969
store i64 %val1, i64* @var64
-; CHECK: orr {{x[0-9]+}}, xzr, #0x9999999999999999
+; CHECK: mov {{x[0-9]+}}, #-7378697629483820647
ret void
; CHECK: ret
diff --git a/test/CodeGen/AArch64/lower-range-metadata-func-call.ll b/test/CodeGen/AArch64/lower-range-metadata-func-call.ll
new file mode 100644
index 000000000000..fd4b2f5ba305
--- /dev/null
+++ b/test/CodeGen/AArch64/lower-range-metadata-func-call.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; and can be eliminated
+; CHECK-LABEL: {{^}}test_call_known_max_range:
+; CHECK: bl foo
+; CHECK-NOT: and
+; CHECK: ret
+define i32 @test_call_known_max_range() #0 {
+entry:
+ %id = tail call i32 @foo(), !range !0
+ %and = and i32 %id, 1023
+ ret i32 %and
+}
+
+; CHECK-LABEL: {{^}}test_call_known_trunc_1_bit_range:
+; CHECK: bl foo
+; CHECK: and w{{[0-9]+}}, w0, #0x1ff
+; CHECK: ret
+define i32 @test_call_known_trunc_1_bit_range() #0 {
+entry:
+ %id = tail call i32 @foo(), !range !0
+ %and = and i32 %id, 511
+ ret i32 %and
+}
+
+; CHECK-LABEL: {{^}}test_call_known_max_range_m1:
+; CHECK: bl foo
+; CHECK: and w{{[0-9]+}}, w0, #0xff
+; CHECK: ret
+define i32 @test_call_known_max_range_m1() #0 {
+entry:
+ %id = tail call i32 @foo(), !range !1
+ %and = and i32 %id, 255
+ ret i32 %and
+}
+
+
+declare i32 @foo()
+
+attributes #0 = { norecurse nounwind }
+attributes #1 = { nounwind readnone }
+
+!0 = !{i32 0, i32 1024}
+!1 = !{i32 0, i32 1023}
diff --git a/test/CodeGen/AArch64/machine-combiner.ll b/test/CodeGen/AArch64/machine-combiner.ll
index 56a742fd6c3a..0bd416ad1721 100644
--- a/test/CodeGen/AArch64/machine-combiner.ll
+++ b/test/CodeGen/AArch64/machine-combiner.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -disable-post-ra < %s | FileCheck %s
; Verify that the first two adds are independent regardless of how the inputs are
; commuted. The destination registers are used as source registers for the third add.
diff --git a/test/CodeGen/AArch64/machine-copy-remove.ll b/test/CodeGen/AArch64/machine-copy-remove.ll
new file mode 100644
index 000000000000..75954f83c19c
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-copy-remove.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: f_XX:
+; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK-NOT: mov x[[REG]], xzr
+define i64 @f_XX(i64 %n, i64* nocapture readonly %P) {
+entry:
+ %tobool = icmp eq i64 %n, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i64, i64* %P
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ]
+ ret i64 %a.0
+}
+
+; CHECK-LABEL: f_WW:
+; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK-NOT: mov w[[REG]], wzr
+define i32 @f_WW(i32 %n, i32* nocapture readonly %P) {
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32, i32* %P
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %a.0 = phi i32 [ %0, %if.then ], [ 0, %entry ]
+ ret i32 %a.0
+}
+
+; CHECK-LABEL: f_XW:
+; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK-NOT: mov w[[REG]], wzr
+define i32 @f_XW(i64 %n, i32* nocapture readonly %P) {
+entry:
+ %tobool = icmp eq i64 %n, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32, i32* %P
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %a.0 = phi i32 [ %0, %if.then ], [ 0, %entry ]
+ ret i32 %a.0
+}
+
+; CHECK-LABEL: f_WX:
+; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK: mov x[[REG]], xzr
+; Do not remove the mov in this case because we do not know if the upper bits
+; of the X register are zero.
+define i64 @f_WX(i32 %n, i64* nocapture readonly %P) {
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i64, i64* %P
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ]
+ ret i64 %a.0
+}
+
+; CHECK-LABEL: test_superreg:
+; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK: str x[[REG]], [x1]
+; CHECK-NOT: mov w[[REG]], wzr
+; Because we returned w0 but x0 was marked live-in to the block, we didn't
+; remove the <kill> on the str leading to a verification failure.
+define i32 @test_superreg(i64 %in, i64* %dest) {
+ %tst = icmp eq i64 %in, 0
+ br i1 %tst, label %true, label %false
+
+false:
+ ret i32 42
+
+true:
+ store volatile i64 %in, i64* %dest
+ ret i32 0
+}
diff --git a/test/CodeGen/AArch64/merge-store-dependency.ll b/test/CodeGen/AArch64/merge-store-dependency.ll
new file mode 100644
index 000000000000..c68cee91a3cf
--- /dev/null
+++ b/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mcpu cortex-a53 -march aarch64 %s -o - | FileCheck %s --check-prefix=A53
+
+; PR26827 - Merge stores causes wrong dependency.
+%struct1 = type { %struct1*, %struct1*, i32, i32, i16, i16, void (i32, i32, i8*)*, i8* }
+@gv0 = internal unnamed_addr global i32 0, align 4
+@gv1 = internal unnamed_addr global %struct1** null, align 8
+
+define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg) {
+;CHECK-LABEL: test
+entry:
+; A53: mov [[DATA:w[0-9]+]], w1
+; A53: str q{{[0-9]+}}, {{.*}}
+; A53: str q{{[0-9]+}}, {{.*}}
+; A53: str [[DATA]], {{.*}}
+
+ %0 = bitcast %struct1* %fde to i8*
+ tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false)
+ %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4
+ store i16 256, i16* %state, align 8
+ %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2
+ store i32 %fd, i32* %fd1, align 8
+ %force_eof = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 3
+ store i32 0, i32* %force_eof, align 4
+ %func2 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 6
+ store void (i32, i32, i8*)* %func, void (i32, i32, i8*)** %func2, align 8
+ %arg3 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 7
+ store i8* %arg, i8** %arg3, align 8
+ %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, i8* %0) #6
+ %1 = load i32, i32* %fd1, align 8
+ %cmp.i = icmp slt i32 %1, 0
+ br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader
+if.then.i:
+ unreachable
+
+while.body.i.preheader:
+ %2 = load i32, i32* @gv0, align 4
+ %3 = icmp eq i32* %fd1, @gv0
+ br i1 %3, label %while.body.i.split, label %while.body.i.split.ver.us.preheader
+
+while.body.i.split.ver.us.preheader:
+ br label %while.body.i.split.ver.us
+
+while.body.i.split.ver.us:
+ %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %2, %while.body.i.split.ver.us.preheader ]
+ %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1
+ %4 = icmp sgt i32 %mul.i.ver.us, %1
+ br i1 %4, label %while.end.i, label %while.body.i.split.ver.us
+
+while.body.i.split:
+ br label %while.body.i.split
+
+while.end.i:
+ %call.i = tail call i8* @foo()
+ store i8* %call.i, i8** bitcast (%struct1*** @gv1 to i8**), align 8
+ br label %exit
+
+exit:
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+declare i32 @fcntl(i32, i32, ...)
+declare noalias i8* @foo()
diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll
index 86f5edd5da1d..981d16f762ff 100644
--- a/test/CodeGen/AArch64/merge-store.ll
+++ b/test/CodeGen/AArch64/merge-store.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march aarch64 %s -o - | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone | FileCheck %s --check-prefix=CYCLONE
+; RUN: llc -mtriple=aarch64-unknown-unknown %s -mcpu=cyclone -o - | FileCheck %s --check-prefix=CYCLONE --check-prefix=CHECK
+; RUN: llc -march aarch64 %s -mattr=-slow-misaligned-128store -o - | FileCheck %s --check-prefix=MISALIGNED --check-prefix=CHECK
@g0 = external global <3 x float>, align 16
@g1 = external global <3 x float>, align 4
@@ -38,9 +38,12 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
store <2 x float> %shuffle1, <2 x float>* %idx1, align 8
ret void
-; CHECK-LABEL: merge_vec_extract_stores
-; CHECK: stur q0, [x0, #24]
-; CHECK-NEXT: ret
+; MISALIGNED-LABEL: merge_vec_extract_stores
+; MISALIGNED: stur q0, [x0, #24]
+; MISALIGNED-NEXT: ret
+
+; FIXME: Ideally we would like to use a generic target for this test, but this relies
+; on suppressing store pairs.
; CYCLONE-LABEL: merge_vec_extract_stores
; CYCLONE: ext v1.16b, v0.16b, v0.16b, #8
diff --git a/test/CodeGen/AArch64/misched-fusion.ll b/test/CodeGen/AArch64/misched-fusion.ll
index d38869329034..0f4c0ac84ce5 100644
--- a/test/CodeGen/AArch64/misched-fusion.ll
+++ b/test/CodeGen/AArch64/misched-fusion.ll
@@ -1,4 +1,6 @@
+; RUN: llc -o - %s -mattr=+macroop-fusion,+use-postra-scheduler | FileCheck %s
; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s
+
target triple = "arm64-apple-ios"
declare void @foobar(i32 %v0, i32 %v1)
@@ -8,12 +10,12 @@ declare void @foobar(i32 %v0, i32 %v1)
; CHECK: add w[[ADDRES:[0-9]+]], w1, #7
; CHECK: sub w[[SUBRES:[0-9]+]], w0, #13
; CHECK-NEXT: cbnz w[[SUBRES]], [[SKIPBLOCK:LBB[0-9_]+]]
-; CHECK: mov x0, x[[ADDRES]]
-; CHECK: mov x1, x[[SUBRES]]
+; CHECK: mov [[REGTY:[x,w]]]0, [[REGTY]][[ADDRES]]
+; CHECK: mov [[REGTY]]1, [[REGTY]][[SUBRES]]
; CHECK: bl _foobar
; CHECK: [[SKIPBLOCK]]:
-; CHECK: mov x0, x[[SUBRES]]
-; CHECK: mov x1, x[[ADDRES]]
+; CHECK: mov [[REGTY]]0, [[REGTY]][[SUBRES]]
+; CHECK: mov [[REGTY]]1, [[REGTY]][[ADDRES]]
; CHECK: bl _foobar
define void @test_sub_cbz(i32 %a0, i32 %a1) {
entry:
diff --git a/test/CodeGen/AArch64/movimm-wzr.mir b/test/CodeGen/AArch64/movimm-wzr.mir
new file mode 100644
index 000000000000..d54e7bef54cd
--- /dev/null
+++ b/test/CodeGen/AArch64/movimm-wzr.mir
@@ -0,0 +1,46 @@
+# RUN: llc -run-pass=aarch64-expand-pseudo %s -o - 2>&1 | FileCheck %s
+
+--- |
+ ; ModuleID = 'simple.ll'
+ source_filename = "simple.ll"
+ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64--linux-gnu"
+
+ define i32 @test_mov_0() {
+ ret i32 42
+ }
+
+...
+---
+name: test_mov_0
+alignment: 2
+exposesReturnsTwice: false
+hasInlineAsm: false
+allVRegsAllocated: true
+isSSA: false
+tracksRegLiveness: false
+tracksSubRegLiveness: false
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %wzr = MOVi32imm 42
+ %xzr = MOVi64imm 42
+ RET_ReallyLR implicit killed %w0
+
+...
+
+# CHECK: bb.0
+# CHECK-NEXT: RET %lr
diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll
index 93c181271755..def6072e0bca 100644
--- a/test/CodeGen/AArch64/movw-consts.ll
+++ b/test/CodeGen/AArch64/movw-consts.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
define i64 @test0() {
; CHECK-LABEL: test0:
@@ -53,19 +53,19 @@ define i64 @test7() {
; couldn't. Useful even for i64
define i64 @test8() {
; CHECK-LABEL: test8:
-; CHECK: movn w0, #{{60875|0xedcb}}
+; CHECK: mov w0, #-60876
ret i64 4294906420
}
define i64 @test9() {
; CHECK-LABEL: test9:
-; CHECK: movn x0, #0
+; CHECK: mov x0, #-1
ret i64 -1
}
define i64 @test10() {
; CHECK-LABEL: test10:
-; CHECK: movn x0, #{{60875|0xedcb}}, lsl #16
+; CHECK: mov x0, #-3989504001
ret i64 18446744069720047615
}
@@ -110,7 +110,7 @@ define void @test15() {
define void @test16() {
; CHECK-LABEL: test16:
-; CHECK: movn {{w[0-9]+}}, #0
+; CHECK: mov {{w[0-9]+}}, #-1
store i32 -1, i32* @var32
ret void
}
diff --git a/test/CodeGen/AArch64/neg-imm.ll b/test/CodeGen/AArch64/neg-imm.ll
new file mode 100644
index 000000000000..375d3dbfd0d5
--- /dev/null
+++ b/test/CodeGen/AArch64/neg-imm.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; LSR used to pick a sub-optimal solution due to the target responding
+; conservatively to isLegalAddImmediate for negative values.
+
+declare void @foo(i32)
+
+define void @test(i32 %px) {
+; CHECK_LABEL: test:
+; CHECK_LABEL: %entry
+; CHECK: subs
+; CHECK-NEXT: csel
+entry:
+ %sub = add nsw i32 %px, -1
+ %cmp = icmp slt i32 %px, 1
+ %.sub = select i1 %cmp, i32 0, i32 %sub
+ br label %for.body
+
+for.body:
+; CHECK_LABEL: %for.body
+; CHECK: cmp
+; CHECK-NEXT: b.eq
+; CHECK-LABEL: %if.then3
+ %x.015 = phi i32 [ %inc, %for.inc ], [ %.sub, %entry ]
+ %cmp2 = icmp eq i32 %x.015, %px
+ br i1 %cmp2, label %for.inc, label %if.then3
+
+if.then3:
+ tail call void @foo(i32 %x.015)
+ br label %for.inc
+
+for.inc:
+; CHECK_LABEL: %for.inc
+; CHECK: add
+; CHECK-NEXT: cmp
+; CHECK: b.le
+; CHECK_LABEL: %for.cond.cleanup
+ %inc = add nsw i32 %x.015, 1
+ %cmp1 = icmp sgt i32 %x.015, %px
+ br i1 %cmp1, label %for.cond.cleanup.loopexit, label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+}
diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll
index 6d89dfbacf41..887cb5dd698a 100644
--- a/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -802,6 +802,63 @@ define <2 x i64> @cmgez2xi64(<2 x i64> %A) {
}
+define <8 x i8> @cmgez8xi8_alt(<8 x i8> %A) {
+; CHECK-LABEL: cmgez8xi8_alt:
+; CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}}
+ %sign = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %not = xor <8 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ ret <8 x i8> %not
+}
+
+define <16 x i8> @cmgez16xi8_alt(<16 x i8> %A) {
+; CHECK-LABEL: cmgez16xi8_alt:
+; CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x0|0}}
+ %sign = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ ret <16 x i8> %not
+}
+
+define <4 x i16> @cmgez4xi16_alt(<4 x i16> %A) {
+; CHECK-LABEL: cmgez4xi16_alt:
+; CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #{{0x0|0}}
+ %sign = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15>
+ %not = xor <4 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1>
+ ret <4 x i16> %not
+}
+
+define <8 x i16> @cmgez8xi16_alt(<8 x i16> %A) {
+; CHECK-LABEL: cmgez8xi16_alt:
+; CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #{{0x0|0}}
+ %sign = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ ret <8 x i16> %not
+}
+
+define <2 x i32> @cmgez2xi32_alt(<2 x i32> %A) {
+; CHECK-LABEL: cmgez2xi32_alt:
+; CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0x0|0}}
+ %sign = ashr <2 x i32> %A, <i32 31, i32 31>
+ %not = xor <2 x i32> %sign, <i32 -1, i32 -1>
+ ret <2 x i32> %not
+}
+
+define <4 x i32> @cmgez4xi32_alt(<4 x i32> %A) {
+; CHECK-LABEL: cmgez4xi32_alt:
+; CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0x0|0}}
+ %sign = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31>
+ %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %not
+}
+
+define <2 x i64> @cmgez2xi64_alt(<2 x i64> %A) {
+; CHECK-LABEL: cmgez2xi64_alt:
+; CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0x0|0}}
+ %sign = ashr <2 x i64> %A, <i64 63, i64 63>
+ %not = xor <2 x i64> %sign, <i64 -1, i64 -1>
+ ret <2 x i64> %not
+}
+
+
define <8 x i8> @cmgtz8xi8(<8 x i8> %A) {
; CHECK-LABEL: cmgtz8xi8:
; CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}}
diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll
index 40649aeb1b8e..7882f5189413 100644
--- a/test/CodeGen/AArch64/neon-mov.ll
+++ b/test/CodeGen/AArch64/neon-mov.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define <8 x i8> @movi8b() {
; CHECK-LABEL: movi8b:
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll
index 41e391dcd76c..b9914356f301 100644
--- a/test/CodeGen/AArch64/neon-perm.ll
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
%struct.int8x8x2_t = type { [2 x <8 x i8>] }
%struct.int16x4x2_t = type { [2 x <4 x i16>] }
diff --git a/test/CodeGen/AArch64/no-quad-ldp-stp.ll b/test/CodeGen/AArch64/no-quad-ldp-stp.ll
new file mode 100644
index 000000000000..19d371adbdf0
--- /dev/null
+++ b/test/CodeGen/AArch64/no-quad-ldp-stp.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=aarch64 -mattr=+no-quad-ldst-pairs -verify-machineinstrs -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=aarch64 -mcpu=exynos-m1 -verify-machineinstrs -asm-verbose=false | FileCheck %s
+
+; CHECK-LABEL: test_nopair_st
+; CHECK: str
+; CHECK: stur
+; CHECK-NOT: stp
+define void @test_nopair_st(double* %ptr, <2 x double> %v1, <2 x double> %v2) {
+ %tmp1 = bitcast double* %ptr to <2 x double>*
+ store <2 x double> %v2, <2 x double>* %tmp1, align 16
+ %add.ptr = getelementptr inbounds double, double* %ptr, i64 -2
+ %tmp = bitcast double* %add.ptr to <2 x double>*
+ store <2 x double> %v1, <2 x double>* %tmp, align 16
+ ret void
+}
+
+; CHECK-LABEL: test_nopair_ld
+; CHECK: ldr
+; CHECK: ldr
+; CHECK-NOT: ldp
+define <2 x i64> @test_nopair_ld(i64* %p) {
+ %a1 = bitcast i64* %p to <2 x i64>*
+ %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8
+ %add.ptr2 = getelementptr inbounds i64, i64* %p, i64 2
+ %a2 = bitcast i64* %add.ptr2 to <2 x i64>*
+ %tmp2 = load <2 x i64>, <2 x i64>* %a2, align 8
+ %add = add nsw <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %add
+}
diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll
index db9779e03190..d8785f845c29 100644
--- a/test/CodeGen/AArch64/nontemporal.ll
+++ b/test/CodeGen/AArch64/nontemporal.ll
@@ -112,7 +112,7 @@ define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 {
define void @test_stnp_i64(i64* %p, i64 %v) #0 {
; CHECK-LABEL: test_stnp_i64:
-; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32
; CHECK-NEXT: stnp w1, w[[HI]], [x0]
; CHECK-NEXT: ret
store i64 %v, i64* %p, align 1, !nontemporal !0
@@ -162,7 +162,7 @@ define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 {
define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
; CHECK-LABEL: test_stnp_i64_offset:
-; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32
; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8]
; CHECK-NEXT: ret
%tmp0 = getelementptr i64, i64* %p, i32 1
@@ -172,7 +172,7 @@ define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 {
; CHECK-LABEL: test_stnp_i64_offset_neg:
-; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32
; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8]
; CHECK-NEXT: ret
%tmp0 = getelementptr i64, i64* %p, i32 -1
diff --git a/test/CodeGen/AArch64/nzcv-save.ll b/test/CodeGen/AArch64/nzcv-save.ll
index f8f42ec9b1a9..9329f3962934 100644
--- a/test/CodeGen/AArch64/nzcv-save.ll
+++ b/test/CodeGen/AArch64/nzcv-save.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=aarch64 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -march=aarch64 < %s | FileCheck %s
; CHECK: mrs [[NZCV_SAVE:x[0-9]+]], NZCV
; CHECK: msr NZCV, [[NZCV_SAVE]]
diff --git a/test/CodeGen/AArch64/optimize-cond-branch.ll b/test/CodeGen/AArch64/optimize-cond-branch.ll
new file mode 100644
index 000000000000..4e3ca6f16e78
--- /dev/null
+++ b/test/CodeGen/AArch64/optimize-cond-branch.ll
@@ -0,0 +1,48 @@
+; RUN: llc -verify-machineinstrs -o - %s | FileCheck %s
+target triple = "arm64--"
+
+; AArch64InstrInfo::optimizeCondBranch() optimizes the
+; "x = and y, 256; cmp x, 0; br" from an "and; cbnz" to a tbnz instruction.
+; It forgot to clear the a flag resulting in a MachineVerifier complaint.
+;
+; Writing a stable/simple test is tricky since most tbz instructions are already
+; formed in SelectionDAG, optimizeCondBranch() only triggers if the and
+; instruction is in a different block than the conditional jump.
+;
+; CHECK-LABEL: func
+; CHECK-NOT: and
+; CHECK: tbnz
+define void @func() {
+ %c0 = icmp sgt i64 0, 0
+ br i1 %c0, label %b1, label %b6
+
+b1:
+ br i1 undef, label %b3, label %b2
+
+b2:
+ %v0 = tail call i32 @extfunc()
+ br label %b5
+
+b3:
+ %v1 = load i32, i32* undef, align 4
+ %v2 = and i32 %v1, 256
+ br label %b5
+
+b5:
+ %v3 = phi i32 [ %v2, %b3 ], [ %v0, %b2 ]
+ %c1 = icmp eq i32 %v3, 0
+ br i1 %c1, label %b8, label %b7
+
+b6:
+ tail call i32 @extfunc()
+ ret void
+
+b7:
+ tail call i32 @extfunc()
+ ret void
+
+b8:
+ ret void
+}
+
+declare i32 @extfunc()
diff --git a/test/CodeGen/AArch64/pie.ll b/test/CodeGen/AArch64/pie.ll
new file mode 100644
index 000000000000..5cd27a8761cc
--- /dev/null
+++ b/test/CodeGen/AArch64/pie.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple aarch64-pc-linux -relocation-model=pic < %s | FileCheck %s
+
+@g1 = global i32 42
+
+define i32* @get_g1() {
+; CHECK: get_g1:
+; CHECK: adrp x0, g1
+; CHECK-NEXT: add x0, x0, :lo12:g1
+ ret i32* @g1
+}
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"PIE Level", i32 2}
diff --git a/test/CodeGen/AArch64/preferred-alignment.ll b/test/CodeGen/AArch64/preferred-alignment.ll
new file mode 100644
index 000000000000..c032e83d268f
--- /dev/null
+++ b/test/CodeGen/AArch64/preferred-alignment.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=aarch64 -O0 < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define i32 @foo() #0 {
+entry:
+ %c = alloca i8, align 1
+; CHECK: add x0, sp, #12
+ %s = alloca i16, align 2
+; CHECK-NEXT: add x1, sp, #8
+ %i = alloca i32, align 4
+; CHECK-NEXT: add x2, sp, #4
+ %call = call i32 @bar(i8* %c, i16* %s, i32* %i)
+ %0 = load i8, i8* %c, align 1
+ %conv = zext i8 %0 to i32
+ %add = add nsw i32 %call, %conv
+ %1 = load i16, i16* %s, align 2
+ %conv1 = sext i16 %1 to i32
+ %add2 = add nsw i32 %add, %conv1
+ %2 = load i32, i32* %i, align 4
+ %add3 = add nsw i32 %add2, %2
+ ret i32 %add3
+}
+
+declare i32 @bar(i8*, i16*, i32*) #1
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+attributes #1 = { "no-frame-pointer-elim"="false" }
+
diff --git a/test/CodeGen/AArch64/preserve_mostcc.ll b/test/CodeGen/AArch64/preserve_mostcc.ll
new file mode 100644
index 000000000000..7f0968c8eb33
--- /dev/null
+++ b/test/CodeGen/AArch64/preserve_mostcc.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios-8.0.0 | FileCheck %s
+
+declare void @standard_cc_func()
+declare preserve_mostcc void @preserve_mostcc_func()
+
+; Registers r9-r15 should be saved before the call of a function
+; with a standard calling convention.
+define preserve_mostcc void @preserve_mostcc1() nounwind {
+entry:
+;CHECK-LABEL: preserve_mostcc1
+;CHECK-NOT: stp
+;CHECK-NOT: str
+;CHECK: str x15
+;CHECK-NEXT: stp x14, x13,
+;CHECK-NEXT: stp x12, x11,
+;CHECK-NEXT: stp x10, x9,
+;CHECK: bl _standard_cc_func
+ call void @standard_cc_func()
+;CHECK: ldp x10, x9,
+;CHECK-NEXT: ldp x12, x11,
+;CHECK-NEXT: ldp x14, x13,
+;CHECK-NEXT: ldr x15
+ ret void
+}
+
+; Registers r9-r15 don't need to be saved if one
+; function with preserve_mostcc calling convention calls another
+; function with preserve_mostcc calling convention, because the
+; callee wil save these registers anyways.
+define preserve_mostcc void @preserve_mostcc2() nounwind {
+entry:
+;CHECK-LABEL: preserve_mostcc2
+;CHECK-NOT: x14
+;CHECK: stp x29, x30,
+;CHECK-NOT: x14
+;CHECK: bl _preserve_mostcc_func
+ call preserve_mostcc void @preserve_mostcc_func()
+ ret void
+}
+
diff --git a/test/CodeGen/AArch64/recp-fastmath.ll b/test/CodeGen/AArch64/recp-fastmath.ll
new file mode 100644
index 000000000000..710739b2cc5f
--- /dev/null
+++ b/test/CodeGen/AArch64/recp-fastmath.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!div,!vec-div | FileCheck %s --check-prefix=FAULT
+; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=div,vec-div | FileCheck %s
+
+define float @frecp(float %x) #0 {
+ %div = fdiv fast float 1.0, %x
+ ret float %div
+
+; FAULT-LABEL: frecp:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fmov
+; FAULT-NEXT: fdiv
+
+; CHECK-LABEL: frecp:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frecpe
+; CHECK-NEXT: fmov
+}
+
+define <2 x float> @f2recp(<2 x float> %x) #0 {
+ %div = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
+ ret <2 x float> %div
+
+; FAULT-LABEL: f2recp:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fmov
+; FAULT-NEXT: fdiv
+
+; CHECK-LABEL: f2recp:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frecpe
+}
+
+define <4 x float> @f4recp(<4 x float> %x) #0 {
+ %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+ ret <4 x float> %div
+
+; FAULT-LABEL: f4recp:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fmov
+; FAULT-NEXT: fdiv
+
+; CHECK-LABEL: f4recp:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frecpe
+}
+
+define double @drecp(double %x) #0 {
+ %div = fdiv fast double 1.0, %x
+ ret double %div
+
+; FAULT-LABEL: drecp:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fmov
+; FAULT-NEXT: fdiv
+
+; CHECK-LABEL: drecp:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: frecpe
+; CHECK-NEXT: fmov
+}
+
+define <2 x double> @d2recp(<2 x double> %x) #0 {
+ %div = fdiv fast <2 x double> <double 1.0, double 1.0>, %x
+ ret <2 x double> %div
+
+; FAULT-LABEL: d2recp:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fmov
+; FAULT-NEXT: fdiv
+
+; CHECK-LABEL: d2recp:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frecpe
+}
+
+attributes #0 = { nounwind "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
index ba34873eaa5b..4bec512403c4 100644
--- a/test/CodeGen/AArch64/regress-tblgen-chains.ll
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -disable-fp-elim -o - %s | FileCheck %s
; When generating DAG selection tables, TableGen used to only flag an
; instruction as needing a chain on its own account if it had a built-in pattern
diff --git a/test/CodeGen/AArch64/rem_crash.ll b/test/CodeGen/AArch64/rem_crash.ll
new file mode 100644
index 000000000000..71f1a80e24e2
--- /dev/null
+++ b/test/CodeGen/AArch64/rem_crash.ll
@@ -0,0 +1,257 @@
+; RUN: llc < %s -march=aarch64
+
+define i8 @test_minsize_uu8(i8 %x) minsize optsize {
+entry:
+ %0 = udiv i8 %x, 10
+ %1 = urem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_minsize_ss8(i8 %x) minsize optsize {
+entry:
+ %0 = sdiv i8 %x, 10
+ %1 = srem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_minsize_us8(i8 %x) minsize optsize {
+entry:
+ %0 = udiv i8 %x, 10
+ %1 = srem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_minsize_su8(i8 %x) minsize optsize {
+entry:
+ %0 = sdiv i8 %x, 10
+ %1 = urem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i16 @test_minsize_uu16(i16 %x) minsize optsize {
+entry:
+ %0 = udiv i16 %x, 10
+ %1 = urem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_minsize_ss16(i16 %x) minsize optsize {
+entry:
+ %0 = sdiv i16 %x, 10
+ %1 = srem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_minsize_us16(i16 %x) minsize optsize {
+entry:
+ %0 = udiv i16 %x, 10
+ %1 = srem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_minsize_su16(i16 %x) minsize optsize {
+entry:
+ %0 = sdiv i16 %x, 10
+ %1 = urem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i32 @test_minsize_uu32(i32 %x) minsize optsize {
+entry:
+ %0 = udiv i32 %x, 10
+ %1 = urem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_minsize_ss32(i32 %x) minsize optsize {
+entry:
+ %0 = sdiv i32 %x, 10
+ %1 = srem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_minsize_us32(i32 %x) minsize optsize {
+entry:
+ %0 = udiv i32 %x, 10
+ %1 = srem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_minsize_su32(i32 %x) minsize optsize {
+entry:
+ %0 = sdiv i32 %x, 10
+ %1 = urem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i64 @test_minsize_uu64(i64 %x) minsize optsize {
+entry:
+ %0 = udiv i64 %x, 10
+ %1 = urem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_minsize_ss64(i64 %x) minsize optsize {
+entry:
+ %0 = sdiv i64 %x, 10
+ %1 = srem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_minsize_us64(i64 %x) minsize optsize {
+entry:
+ %0 = udiv i64 %x, 10
+ %1 = srem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_minsize_su64(i64 %x) minsize optsize {
+entry:
+ %0 = sdiv i64 %x, 10
+ %1 = urem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i8 @test_uu8(i8 %x) optsize {
+entry:
+ %0 = udiv i8 %x, 10
+ %1 = urem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_ss8(i8 %x) optsize {
+entry:
+ %0 = sdiv i8 %x, 10
+ %1 = srem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_us8(i8 %x) optsize {
+entry:
+ %0 = udiv i8 %x, 10
+ %1 = srem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_su8(i8 %x) optsize {
+entry:
+ %0 = sdiv i8 %x, 10
+ %1 = urem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i16 @test_uu16(i16 %x) optsize {
+entry:
+ %0 = udiv i16 %x, 10
+ %1 = urem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_ss16(i16 %x) optsize {
+entry:
+ %0 = sdiv i16 %x, 10
+ %1 = srem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_us16(i16 %x) optsize {
+entry:
+ %0 = udiv i16 %x, 10
+ %1 = srem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_su16(i16 %x) optsize {
+entry:
+ %0 = sdiv i16 %x, 10
+ %1 = urem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i32 @test_uu32(i32 %x) optsize {
+entry:
+ %0 = udiv i32 %x, 10
+ %1 = urem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_ss32(i32 %x) optsize {
+entry:
+ %0 = sdiv i32 %x, 10
+ %1 = srem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_us32(i32 %x) optsize {
+entry:
+ %0 = udiv i32 %x, 10
+ %1 = srem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_su32(i32 %x) optsize {
+entry:
+ %0 = sdiv i32 %x, 10
+ %1 = urem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i64 @test_uu64(i64 %x) optsize {
+entry:
+ %0 = udiv i64 %x, 10
+ %1 = urem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_ss64(i64 %x) optsize {
+entry:
+ %0 = sdiv i64 %x, 10
+ %1 = srem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_us64(i64 %x) optsize {
+entry:
+ %0 = udiv i64 %x, 10
+ %1 = srem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_su64(i64 %x) optsize {
+entry:
+ %0 = sdiv i64 %x, 10
+ %1 = urem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll
index c2721e70190a..b2ca1cca0812 100644
--- a/test/CodeGen/AArch64/remat.ll
+++ b/test/CodeGen/AArch64/remat.ll
@@ -2,7 +2,11 @@
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a57 -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a72 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a73 -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m1 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=vulcan -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s
%X = type { i64, i64, i64 }
declare void @f(%X*)
@@ -11,9 +15,11 @@ entry:
%tmp = alloca %X
call void @f(%X* %tmp)
; CHECK: add x0, sp, #8
-; CHECK-NEXT-NOT: mov
+; CHECK-NOT: mov
+; CHECK-NEXT: bl f
call void @f(%X* %tmp)
; CHECK: add x0, sp, #8
-; CHECK-NEXT-NOT: mov
+; CHECK-NOT: mov
+; CHECK-NEXT: bl f
ret void
}
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
index a68fdec4cfbc..925d1881f563 100644
--- a/test/CodeGen/AArch64/sibling-call.ll
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -92,6 +92,6 @@ define void @indirect_tail() {
tail call void %fptr(i32 42)
ret void
; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:func]
-; CHECK: movz w0, #{{42|0x2a}}
+; CHECK: mov w0, #{{42|0x2a}}
; CHECK: br [[FPTR]]
}
diff --git a/test/CodeGen/AArch64/special-reg.ll b/test/CodeGen/AArch64/special-reg.ll
index 91c32158d420..4b8c75b70985 100644
--- a/test/CodeGen/AArch64/special-reg.ll
+++ b/test/CodeGen/AArch64/special-reg.ll
@@ -35,7 +35,7 @@ entry:
define void @write_daifset() nounwind {
entry:
; CHECK-LABEL: write_daifset:
-; CHECK: msr DAIFSET, #2
+; CHECK: msr DAIFSet, #2
call void @llvm.write_register.i64(metadata !2, i64 2)
ret void
}
diff --git a/test/CodeGen/AArch64/sqrt-fastmath.ll b/test/CodeGen/AArch64/sqrt-fastmath.ll
new file mode 100644
index 000000000000..0d9533fd27fc
--- /dev/null
+++ b/test/CodeGen/AArch64/sqrt-fastmath.ll
@@ -0,0 +1,160 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!sqrt,!vec-sqrt | FileCheck %s --check-prefix=FAULT
+; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=sqrt,vec-sqrt | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=neon,-use-reverse-square-root | FileCheck %s --check-prefix=FAULT
+; RUN: llc < %s -mtriple=aarch64 -mattr=neon,+use-reverse-square-root | FileCheck %s
+
+declare float @llvm.sqrt.f32(float) #1
+declare double @llvm.sqrt.f64(double) #1
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #1
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #1
+
+define float @fsqrt(float %a) #0 {
+ %1 = tail call fast float @llvm.sqrt.f32(float %a)
+ ret float %1
+
+; FAULT-LABEL: fsqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: fsqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frsqrte
+}
+
+define <2 x float> @f2sqrt(<2 x float> %a) #0 {
+ %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2
+ ret <2 x float> %1
+
+; FAULT-LABEL: f2sqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: f2sqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: mov
+; CHECK-NEXT: frsqrte
+}
+
+define <4 x float> @f4sqrt(<4 x float> %a) #0 {
+ %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2
+ ret <4 x float> %1
+
+; FAULT-LABEL: f4sqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: f4sqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: mov
+; CHECK-NEXT: frsqrte
+}
+
+define double @dsqrt(double %a) #0 {
+ %1 = tail call fast double @llvm.sqrt.f64(double %a)
+ ret double %1
+
+; FAULT-LABEL: dsqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: dsqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frsqrte
+}
+
+define <2 x double> @d2sqrt(<2 x double> %a) #0 {
+ %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2
+ ret <2 x double> %1
+
+; FAULT-LABEL: d2sqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: d2sqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: mov
+; CHECK-NEXT: frsqrte
+}
+
+define float @frsqrt(float %a) #0 {
+ %1 = tail call fast float @llvm.sqrt.f32(float %a)
+ %2 = fdiv fast float 1.000000e+00, %1
+ ret float %2
+
+; FAULT-LABEL: frsqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: frsqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frsqrte
+}
+
+define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
+ %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2
+ %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
+ ret <2 x float> %2
+
+; FAULT-LABEL: f2rsqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: f2rsqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frsqrte
+}
+
+define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
+ %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2
+ %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
+ ret <4 x float> %2
+
+; FAULT-LABEL: f4rsqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: f4rsqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frsqrte
+}
+
+define double @drsqrt(double %a) #0 {
+ %1 = tail call fast double @llvm.sqrt.f64(double %a)
+ %2 = fdiv fast double 1.000000e+00, %1
+ ret double %2
+
+; FAULT-LABEL: drsqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: drsqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frsqrte
+}
+
+define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
+ %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2
+ %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1
+ ret <2 x double> %2
+
+; FAULT-LABEL: d2rsqrt:
+; FAULT-NEXT: BB#0
+; FAULT-NEXT: fsqrt
+
+; CHECK-LABEL: d2rsqrt:
+; CHECK-NEXT: BB#0
+; CHECK-NEXT: fmov
+; CHECK-NEXT: frsqrte
+}
+
+attributes #0 = { nounwind "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll b/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
index eb4937e75f61..6f1515a98264 100644
--- a/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
+++ b/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios -relocation-model=pic -disable-fp-elim | FileCheck %s
@__stack_chk_guard = external global i64*
@@ -6,10 +6,14 @@
; CHECK: adrp [[R0:x[0-9]+]], ___stack_chk_guard@GOTPAGE
; CHECK: ldr [[R1:x[0-9]+]], {{\[}}[[R0]], ___stack_chk_guard@GOTPAGEOFF{{\]}}
+; Load the stack guard for the second time, just in case the previous value gets spilled.
+; CHECK: adrp [[GUARD_PAGE:x[0-9]+]], ___stack_chk_guard@GOTPAGE
; CHECK: ldr [[R2:x[0-9]+]], {{\[}}[[R1]]{{\]}}
; CHECK: stur [[R2]], {{\[}}x29, [[SLOT0:[0-9#\-]+]]{{\]}}
; CHECK: ldur [[R3:x[0-9]+]], {{\[}}x29, [[SLOT0]]{{\]}}
-; CHECK: sub [[R4:x[0-9]+]], [[R2]], [[R3]]
+; CHECK: ldr [[GUARD_ADDR:x[0-9]+]], {{\[}}[[GUARD_PAGE]], ___stack_chk_guard@GOTPAGEOFF{{\]}}
+; CHECK: ldr [[GUARD:x[0-9]+]], {{\[}}[[GUARD_ADDR]]{{\]}}
+; CHECK: sub [[R4:x[0-9]+]], [[GUARD]], [[R3]]
; CHECK: cbnz [[R4]], LBB
define i32 @test_stack_guard_remat2() {
diff --git a/test/CodeGen/AArch64/stack-protector-target.ll b/test/CodeGen/AArch64/stack-protector-target.ll
new file mode 100644
index 000000000000..d4d806289bff
--- /dev/null
+++ b/test/CodeGen/AArch64/stack-protector-target.ll
@@ -0,0 +1,19 @@
+; Test target-specific stack cookie location.
+; RUN: llc -mtriple=aarch64-linux-android < %s -o - | FileCheck --check-prefix=ANDROID-AARCH64 %s
+
+define void @_Z1fv() sspreq {
+entry:
+ %x = alloca i32, align 4
+ %0 = bitcast i32* %x to i8*
+ call void @_Z7CapturePi(i32* nonnull %x)
+ ret void
+}
+
+declare void @_Z7CapturePi(i32*)
+
+; ANDROID-AARCH64: mrs [[A:.*]], TPIDR_EL0
+; ANDROID-AARCH64: ldr [[B:.*]], {{\[}}[[A]], #40]
+; ANDROID-AARCH64: str [[B]], [sp,
+; ANDROID-AARCH64: ldr [[C:.*]], {{\[}}[[A]], #40]
+; ANDROID-AARCH64: ldr [[D:.*]], [sp,
+; ANDROID-AARCH64: cmp [[C]], [[D]]
diff --git a/test/CodeGen/AArch64/stackmap-frame-setup.ll b/test/CodeGen/AArch64/stackmap-frame-setup.ll
index 4712012b0d25..5646703fa403 100644
--- a/test/CodeGen/AArch64/stackmap-frame-setup.ll
+++ b/test/CodeGen/AArch64/stackmap-frame-setup.ll
@@ -1,5 +1,5 @@
-; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=aarch64-apple-darwin -stop-after machine-sink %s | FileCheck %s --check-prefix=ISEL
-; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -stop-after machine-sink %s | FileCheck %s --check-prefix=FAST-ISEL
+; RUN: llc -o - -verify-machineinstrs -mtriple=aarch64-apple-darwin -stop-after machine-sink %s | FileCheck %s --check-prefix=ISEL
+; RUN: llc -o - -verify-machineinstrs -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -stop-after machine-sink %s | FileCheck %s --check-prefix=FAST-ISEL
define void @caller_meta_leaf() {
entry:
diff --git a/test/CodeGen/AArch64/stackmap-liveness.ll b/test/CodeGen/AArch64/stackmap-liveness.ll
index 6b37aac16f9e..224a9c418526 100644
--- a/test/CodeGen/AArch64/stackmap-liveness.ll
+++ b/test/CodeGen/AArch64/stackmap-liveness.ll
@@ -37,7 +37,7 @@ define i64 @stackmap_liveness(i1 %c) {
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 8
; Align
-; CHECK-NEXT: .align 3
+; CHECK-NEXT: .p2align 3
%1 = select i1 %c, i64 1, i64 2
call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 32, i8* null, i32 0)
ret i64 %1
diff --git a/test/CodeGen/AArch64/subs-to-sub-opt.ll b/test/CodeGen/AArch64/subs-to-sub-opt.ll
new file mode 100644
index 000000000000..f33e24e777fe
--- /dev/null
+++ b/test/CodeGen/AArch64/subs-to-sub-opt.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -O3 -o - %s | FileCheck %s
+
+@a = external global i8, align 1
+@b = external global i8, align 1
+
+; Test that SUBS is replaced by SUB if condition flags are not used.
+define i32 @test01() nounwind {
+; CHECK: ldrb {{.*}}
+; CHECK-NEXT: ldrb {{.*}}
+; CHECK-NEXT: sub {{.*}}
+; CHECK-NEXT: cmn {{.*}}
+entry:
+ %0 = load i8, i8* @a, align 1
+ %conv = zext i8 %0 to i32
+ %1 = load i8, i8* @b, align 1
+ %conv1 = zext i8 %1 to i32
+ %s = sub nsw i32 %conv1, %conv
+ %cmp0 = icmp eq i32 %s, -1
+ %cmp1 = sext i1 %cmp0 to i8
+ store i8 %cmp1, i8* @a
+ ret i32 0
+}
+
diff --git a/test/CodeGen/AArch64/swifterror.ll b/test/CodeGen/AArch64/swifterror.ll
new file mode 100644
index 000000000000..a0bfffdef95e
--- /dev/null
+++ b/test/CodeGen/AArch64/swifterror.ll
@@ -0,0 +1,385 @@
+; RUN: llc -verify-machineinstrs -disable-fp-elim < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-APPLE %s
+; RUN: llc -verify-machineinstrs -disable-fp-elim -O0 < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-O0 %s
+
+declare i8* @malloc(i64)
+declare void @free(i8*)
+%swift_error = type {i64, i8}
+
+; This tests the basic usage of a swifterror parameter. "foo" is the function
+; that takes a swifterror parameter and "caller" is the caller of "foo".
+define float @foo(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: foo:
+; CHECK-APPLE: orr w0, wzr, #0x10
+; CHECK-APPLE: malloc
+; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1
+; CHECK-APPLE: strb [[ID]], [x0, #8]
+; CHECK-APPLE: mov x19, x0
+; CHECK-APPLE-NOT: x19
+
+; CHECK-O0-LABEL: foo:
+; CHECK-O0: orr w{{.*}}, wzr, #0x10
+; CHECK-O0: malloc
+; CHECK-O0: mov [[ID2:x[0-9]+]], x0
+; CHECK-O0: orr [[ID:w[0-9]+]], wzr, #0x1
+; CHECK-O0: strb [[ID]], [x0, #8]
+; CHECK-O0: mov x19, [[ID2]]
+; CHECK-O0-NOT: x19
+entry:
+ %call = call i8* @malloc(i64 16)
+ %call.0 = bitcast i8* %call to %swift_error*
+ store %swift_error* %call.0, %swift_error** %error_ptr_ref
+ %tmp = getelementptr inbounds i8, i8* %call, i64 8
+ store i8 1, i8* %tmp
+ ret float 1.0
+}
+
+; "caller" calls "foo" that takes a swifterror parameter.
+define float @caller(i8* %error_ref) {
+; CHECK-APPLE-LABEL: caller:
+; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
+; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: bl {{.*}}foo
+; CHECK-APPLE: cbnz x19
+; Access part of the error object and save it to error_ref
+; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
+; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
+; CHECK-APPLE: mov x0, x19
+; CHECK_APPLE: bl {{.*}}free
+
+; CHECK-O0-LABEL: caller:
+; CHECK-O0: mov x19
+; CHECK-O0: bl {{.*}}foo
+; CHECK-O0: mov [[ID:x[0-9]+]], x19
+; CHECK-O0: cbnz [[ID]]
+entry:
+ %error_ptr_ref = alloca swifterror %swift_error*
+ store %swift_error* null, %swift_error** %error_ptr_ref
+ %call = call float @foo(%swift_error** swifterror %error_ptr_ref)
+ %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref
+ %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null
+ %tmp = bitcast %swift_error* %error_from_foo to i8*
+ br i1 %had_error_from_foo, label %handler, label %cont
+cont:
+ %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1
+ %t = load i8, i8* %v1
+ store i8 %t, i8* %error_ref
+ br label %handler
+handler:
+ call void @free(i8* %tmp)
+ ret float 1.0
+}
+
+; "caller2" is the caller of "foo", it calls "foo" inside a loop.
+define float @caller2(i8* %error_ref) {
+; CHECK-APPLE-LABEL: caller2:
+; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
+; CHECK-APPLE: fmov [[CMP:s[0-9]+]], #1.0
+; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: bl {{.*}}foo
+; CHECK-APPLE: cbnz x19
+; CHECK-APPLE: fcmp s0, [[CMP]]
+; CHECK-APPLE: b.le
+; Access part of the error object and save it to error_ref
+; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
+; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
+; CHECK-APPLE: mov x0, x19
+; CHECK_APPLE: bl {{.*}}free
+
+; CHECK-O0-LABEL: caller2:
+; CHECK-O0: mov x19
+; CHECK-O0: bl {{.*}}foo
+; CHECK-O0: mov [[ID:x[0-9]+]], x19
+; CHECK-O0: cbnz [[ID]]
+entry:
+ %error_ptr_ref = alloca swifterror %swift_error*
+ br label %bb_loop
+bb_loop:
+ store %swift_error* null, %swift_error** %error_ptr_ref
+ %call = call float @foo(%swift_error** swifterror %error_ptr_ref)
+ %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref
+ %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null
+ %tmp = bitcast %swift_error* %error_from_foo to i8*
+ br i1 %had_error_from_foo, label %handler, label %cont
+cont:
+ %cmp = fcmp ogt float %call, 1.000000e+00
+ br i1 %cmp, label %bb_end, label %bb_loop
+bb_end:
+ %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1
+ %t = load i8, i8* %v1
+ store i8 %t, i8* %error_ref
+ br label %handler
+handler:
+ call void @free(i8* %tmp)
+ ret float 1.0
+}
+
+; "foo_if" is a function that takes a swifterror parameter, it sets swifterror
+; under a certain condition.
+define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
+; CHECK-APPLE-LABEL: foo_if:
+; CHECK-APPLE: cbz w0
+; CHECK-APPLE: orr w0, wzr, #0x10
+; CHECK-APPLE: malloc
+; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1
+; CHECK-APPLE: strb [[ID]], [x0, #8]
+; CHECK-APPLE: mov x19, x0
+; CHECK-APPLE-NOT: x19
+; CHECK-APPLE: ret
+
+; CHECK-O0-LABEL: foo_if:
+; spill x19
+; CHECK-O0: str x19
+; CHECK-O0: cbz w0
+; CHECK-O0: orr w{{.*}}, wzr, #0x10
+; CHECK-O0: malloc
+; CHECK-O0: mov [[ID:x[0-9]+]], x0
+; CHECK-O0: orr [[ID2:w[0-9]+]], wzr, #0x1
+; CHECK-O0: strb [[ID2]], [x0, #8]
+; CHECK-O0: mov x19, [[ID]]
+; CHECK-O0: ret
+; reload from stack
+; CHECK-O0: ldr x19
+; CHECK-O0: ret
+entry:
+ %cond = icmp ne i32 %cc, 0
+ br i1 %cond, label %gen_error, label %normal
+
+gen_error:
+ %call = call i8* @malloc(i64 16)
+ %call.0 = bitcast i8* %call to %swift_error*
+ store %swift_error* %call.0, %swift_error** %error_ptr_ref
+ %tmp = getelementptr inbounds i8, i8* %call, i64 8
+ store i8 1, i8* %tmp
+ ret float 1.0
+
+normal:
+ ret float 0.0
+}
+
+; "foo_loop" is a function that takes a swifterror parameter, it sets swifterror
+; under a certain condition inside a loop.
+define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) {
+; CHECK-APPLE-LABEL: foo_loop:
+; CHECK-APPLE: mov x0, x19
+; CHECK-APPLE: cbz
+; CHECK-APPLE: orr w0, wzr, #0x10
+; CHECK-APPLE: malloc
+; CHECK-APPLE: strb w{{.*}}, [x0, #8]
+; CHECK-APPLE: fcmp
+; CHECK-APPLE: b.le
+; CHECK-APPLE: mov x19, x0
+; CHECK-APPLE: ret
+
+; CHECK-O0-LABEL: foo_loop:
+; spill x19
+; CHECK-O0: str x19
+; CHECk-O0: cbz
+; CHECK-O0: orr w{{.*}}, wzr, #0x10
+; CHECK-O0: malloc
+; CHECK-O0: mov [[ID:x[0-9]+]], x0
+; CHECK-O0: strb w{{.*}}, [{{.*}}[[ID]], #8]
+; spill x0
+; CHECK-O0: str x0
+; CHECK-O0: fcmp
+; CHECK-O0: b.le
+; reload from stack
+; CHECK-O0: ldr x19
+; CHECK-O0: ret
+entry:
+ br label %bb_loop
+
+bb_loop:
+ %cond = icmp ne i32 %cc, 0
+ br i1 %cond, label %gen_error, label %bb_cont
+
+gen_error:
+ %call = call i8* @malloc(i64 16)
+ %call.0 = bitcast i8* %call to %swift_error*
+ store %swift_error* %call.0, %swift_error** %error_ptr_ref
+ %tmp = getelementptr inbounds i8, i8* %call, i64 8
+ store i8 1, i8* %tmp
+ br label %bb_cont
+
+bb_cont:
+ %cmp = fcmp ogt float %cc2, 1.000000e+00
+ br i1 %cmp, label %bb_end, label %bb_loop
+bb_end:
+ ret float 0.0
+}
+
+%struct.S = type { i32, i32, i32, i32, i32, i32 }
+
+; "foo_sret" is a function that takes a swifterror parameter, it also has a sret
+; parameter.
+define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: foo_sret:
+; CHECK-APPLE: mov [[SRET:x[0-9]+]], x8
+; CHECK-APPLE: orr w0, wzr, #0x10
+; CHECK-APPLE: malloc
+; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1
+; CHECK-APPLE: strb [[ID]], [x0, #8]
+; CHECK-APPLE: str w{{.*}}, [{{.*}}[[SRET]], #4]
+; CHECK-APPLE: mov x19, x0
+; CHECK-APPLE-NOT: x19
+
+; CHECK-O0-LABEL: foo_sret:
+; CHECK-O0: orr w{{.*}}, wzr, #0x10
+; spill x8
+; CHECK-O0-DAG: str x8
+; spill x19
+; CHECK-O0-DAG: str x19
+; CHECK-O0: malloc
+; CHECK-O0: orr [[ID:w[0-9]+]], wzr, #0x1
+; CHECK-O0: strb [[ID]], [x0, #8]
+; reload from stack
+; CHECK-O0: ldr [[SRET:x[0-9]+]]
+; CHECK-O0: str w{{.*}}, [{{.*}}[[SRET]], #4]
+; CHECK-O0: mov x19
+; CHECK-O0-NOT: x19
+entry:
+ %call = call i8* @malloc(i64 16)
+ %call.0 = bitcast i8* %call to %swift_error*
+ store %swift_error* %call.0, %swift_error** %error_ptr_ref
+ %tmp = getelementptr inbounds i8, i8* %call, i64 8
+ store i8 1, i8* %tmp
+ %v2 = getelementptr inbounds %struct.S, %struct.S* %agg.result, i32 0, i32 1
+ store i32 %val1, i32* %v2
+ ret void
+}
+
+; "caller3" calls "foo_sret" that takes a swifterror parameter.
+define float @caller3(i8* %error_ref) {
+; CHECK-APPLE-LABEL: caller3:
+; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
+; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: bl {{.*}}foo_sret
+; CHECK-APPLE: cbnz x19
+; Access part of the error object and save it to error_ref
+; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
+; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
+; CHECK-APPLE: mov x0, x19
+; CHECK_APPLE: bl {{.*}}free
+
+; CHECK-O0-LABEL: caller3:
+; spill x0
+; CHECK-O0: str x0
+; CHECK-O0: mov x19
+; CHECK-O0: bl {{.*}}foo_sret
+; CHECK-O0: mov [[ID2:x[0-9]+]], x19
+; CHECK-O0: cbnz [[ID2]]
+; Access part of the error object and save it to error_ref
+; reload from stack
+; CHECK-O0: ldrb [[CODE:w[0-9]+]]
+; CHECK-O0: ldr [[ID:x[0-9]+]]
+; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
+; CHECK_O0: bl {{.*}}free
+entry:
+ %s = alloca %struct.S, align 8
+ %error_ptr_ref = alloca swifterror %swift_error*
+ store %swift_error* null, %swift_error** %error_ptr_ref
+ call void @foo_sret(%struct.S* sret %s, i32 1, %swift_error** swifterror %error_ptr_ref)
+ %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref
+ %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null
+ %tmp = bitcast %swift_error* %error_from_foo to i8*
+ br i1 %had_error_from_foo, label %handler, label %cont
+cont:
+ %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1
+ %t = load i8, i8* %v1
+ store i8 %t, i8* %error_ref
+ br label %handler
+handler:
+ call void @free(i8* %tmp)
+ ret float 1.0
+}
+
+; "foo_vararg" is a function that takes a swifterror parameter, it also has
+; variable number of arguments.
+declare void @llvm.va_start(i8*) nounwind
+define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
+; CHECK-APPLE-LABEL: foo_vararg:
+; CHECK-APPLE: orr w0, wzr, #0x10
+; CHECK-APPLE: malloc
+; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1
+; CHECK-APPLE: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16
+; CHECK-APPLE: strb [[ID]], [x0, #8]
+
+; First vararg
+; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8
+; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16]
+; CHECK-APPLE: add {{x[0-9]+}}, {{x[0-9]+}}, #8
+; Second vararg
+; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+; CHECK-APPLE: add {{x[0-9]+}}, {{x[0-9]+}}, #8
+; Third vararg
+; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+
+; CHECK-APPLE: mov x19, x0
+; CHECK-APPLE-NOT: x19
+entry:
+ %call = call i8* @malloc(i64 16)
+ %call.0 = bitcast i8* %call to %swift_error*
+ store %swift_error* %call.0, %swift_error** %error_ptr_ref
+ %tmp = getelementptr inbounds i8, i8* %call, i64 8
+ store i8 1, i8* %tmp
+
+ %args = alloca i8*, align 8
+ %a10 = alloca i32, align 4
+ %a11 = alloca i32, align 4
+ %a12 = alloca i32, align 4
+ %v10 = bitcast i8** %args to i8*
+ call void @llvm.va_start(i8* %v10)
+ %v11 = va_arg i8** %args, i32
+ store i32 %v11, i32* %a10, align 4
+ %v12 = va_arg i8** %args, i32
+ store i32 %v12, i32* %a11, align 4
+ %v13 = va_arg i8** %args, i32
+ store i32 %v13, i32* %a12, align 4
+
+ ret float 1.0
+}
+
+; "caller4" calls "foo_vararg" that takes a swifterror parameter.
+define float @caller4(i8* %error_ref) {
+; CHECK-APPLE-LABEL: caller4:
+
+; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
+; CHECK-APPLE: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
+; CHECK-APPLE: str {{x[0-9]+}}, [sp]
+
+; CHECK-APPLE: mov x19, xzr
+; CHECK-APPLE: bl {{.*}}foo_vararg
+; CHECK-APPLE: cbnz x19
+; Access part of the error object and save it to error_ref
+; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8]
+; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
+; CHECK-APPLE: mov x0, x19
+; CHECK_APPLE: bl {{.*}}free
+entry:
+ %error_ptr_ref = alloca swifterror %swift_error*
+ store %swift_error* null, %swift_error** %error_ptr_ref
+
+ %a10 = alloca i32, align 4
+ %a11 = alloca i32, align 4
+ %a12 = alloca i32, align 4
+ store i32 10, i32* %a10, align 4
+ store i32 11, i32* %a11, align 4
+ store i32 12, i32* %a12, align 4
+ %v10 = load i32, i32* %a10, align 4
+ %v11 = load i32, i32* %a11, align 4
+ %v12 = load i32, i32* %a12, align 4
+
+ %call = call float (%swift_error**, ...) @foo_vararg(%swift_error** swifterror %error_ptr_ref, i32 %v10, i32 %v11, i32 %v12)
+ %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref
+ %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null
+ %tmp = bitcast %swift_error* %error_from_foo to i8*
+ br i1 %had_error_from_foo, label %handler, label %cont
+
+cont:
+ %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1
+ %t = load i8, i8* %v1
+ store i8 %t, i8* %error_ref
+ br label %handler
+handler:
+ call void @free(i8* %tmp)
+ ret float 1.0
+}
diff --git a/test/CodeGen/AArch64/swiftself.ll b/test/CodeGen/AArch64/swiftself.ll
new file mode 100644
index 000000000000..a60aed6b0f2b
--- /dev/null
+++ b/test/CodeGen/AArch64/swiftself.ll
@@ -0,0 +1,67 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s
+; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s
+
+; Parameter with swiftself should be allocated to x20.
+; CHECK-LABEL: swiftself_param:
+; CHECK: mov x0, x20
+; CHECK-NEXT: ret
+define i8* @swiftself_param(i8* swiftself %addr0) {
+ ret i8 *%addr0
+}
+
+; Check that x20 is used to pass a swiftself argument.
+; CHECK-LABEL: call_swiftself:
+; CHECK: mov x20, x0
+; CHECK: bl {{_?}}swiftself_param
+; CHECK: ret
+define i8 *@call_swiftself(i8* %arg) {
+ %res = call i8 *@swiftself_param(i8* swiftself %arg)
+ ret i8 *%res
+}
+
+; x20 should be saved by the callee even if used for swiftself
+; CHECK-LABEL: swiftself_clobber:
+; CHECK: {{stp|str}} {{.*}}x20{{.*}}sp
+; ...
+; CHECK: {{ldp|ldr}} {{.*}}x20{{.*}}sp
+; CHECK: ret
+define i8 *@swiftself_clobber(i8* swiftself %addr0) {
+ call void asm sideeffect "", "~{x20}"()
+ ret i8 *%addr0
+}
+
+; Demonstrate that we do not need any movs when calling multiple functions
+; with swiftself argument.
+; CHECK-LABEL: swiftself_passthrough:
+; OPT-NOT: mov{{.*}}x20
+; OPT: bl {{_?}}swiftself_param
+; OPT-NOT: mov{{.*}}x20
+; OPT-NEXT: bl {{_?}}swiftself_param
+; OPT: ret
+define void @swiftself_passthrough(i8* swiftself %addr0) {
+ call i8 *@swiftself_param(i8* swiftself %addr0)
+ call i8 *@swiftself_param(i8* swiftself %addr0)
+ ret void
+}
+
+; We can use a tail call if the callee swiftself is the same as the caller one.
+; CHECK-LABEL: swiftself_tail:
+; OPT: b {{_?}}swiftself_param
+; OPT-NOT: ret
+define i8* @swiftself_tail(i8* swiftself %addr0) {
+ call void asm sideeffect "", "~{x20}"()
+ %res = tail call i8* @swiftself_param(i8* swiftself %addr0)
+ ret i8* %res
+}
+
+; We can not use a tail call if the callee swiftself is not the same as the
+; caller one.
+; CHECK-LABEL: swiftself_notail:
+; CHECK: mov x20, x0
+; CHECK: bl {{_?}}swiftself_param
+; CHECK: ret
+define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind {
+ %res = tail call i8* @swiftself_param(i8* swiftself %addr1)
+ ret i8* %res
+}
diff --git a/test/CodeGen/AArch64/tailcall-ccmismatch.ll b/test/CodeGen/AArch64/tailcall-ccmismatch.ll
new file mode 100644
index 000000000000..ab96e609dd46
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-ccmismatch.ll
@@ -0,0 +1,24 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple="aarch64--"
+
+declare void @somefunc()
+define preserve_mostcc void @test_ccmismatch_notail() {
+; Ensure that no tail call is used here, as the called function somefunc does
+; not preserve enough registers for preserve_mostcc.
+; CHECK-LABEL: test_ccmismatch_notail:
+; CHECK-NOT: b somefunc
+; CHECK: bl somefunc
+ tail call void @somefunc()
+ ret void
+}
+
+declare preserve_mostcc void @some_preserve_most_func()
+define void @test_ccmismatch_tail() {
+; We can perform a tail call here, because some_preserve_most_func preserves
+; all registers necessary for test_ccmismatch_tail.
+; CHECK-LABEL: test_ccmismatch_tail:
+; CHECK-NOT: bl some_preserve_most_func
+; CHECK: b some_preserve_most_func
+ tail call preserve_mostcc void @some_preserve_most_func()
+ ret void
+}
diff --git a/test/CodeGen/AArch64/tailcall-implicit-sret.ll b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
index 5d6805998d22..3955877b09b7 100644
--- a/test/CodeGen/AArch64/tailcall-implicit-sret.ll
+++ b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -disable-post-ra -asm-verbose=false | FileCheck %s
; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
diff --git a/test/CodeGen/AArch64/tailcall_misched_graph.ll b/test/CodeGen/AArch64/tailcall_misched_graph.ll
index 343ffab57e35..59a3be905f17 100644
--- a/test/CodeGen/AArch64/tailcall_misched_graph.ll
+++ b/test/CodeGen/AArch64/tailcall_misched_graph.ll
@@ -37,6 +37,8 @@ declare void @callee2(i8*, i8*, i8*, i8*, i8*,
; CHECK: SU({{.*}}): [[VRB]]<def> = LDRXui <fi#-2>
; CHECK-NOT: SU
; CHECK: Successors:
-; CHECK: ch SU([[DEPSTORE:.*]]): Latency=0
+; CHECK: ch SU([[DEPSTOREB:.*]]): Latency=0
+; CHECK: ch SU([[DEPSTOREA:.*]]): Latency=0
-; CHECK: SU([[DEPSTORE]]): STRXui %vreg0, <fi#-4>
+; CHECK: SU([[DEPSTOREA]]): STRXui %vreg{{.*}}, <fi#-4>
+; CHECK: SU([[DEPSTOREB]]): STRXui %vreg{{.*}}, <fi#-3>
diff --git a/test/CodeGen/AArch64/tailmerging_in_mbp.ll b/test/CodeGen/AArch64/tailmerging_in_mbp.ll
new file mode 100644
index 000000000000..d850801ee54a
--- /dev/null
+++ b/test/CodeGen/AArch64/tailmerging_in_mbp.ll
@@ -0,0 +1,63 @@
+; RUN: llc <%s -march=aarch64 -verify-machine-dom-info | FileCheck %s
+
+; CHECK-LABEL: test:
+; CHECK: LBB0_7:
+; CHECK: b.hi
+; CHECK-NEXT: b
+; CHECK-NEXT: LBB0_8:
+; CHECK-NEXT: mov x8, x9
+; CHECK-NEXT: LBB0_9:
+define i64 @test(i64 %n, i64* %a, i64* %b, i64* %c, i64* %d, i64* %e, i64* %f) {
+entry:
+ %cmp28 = icmp sgt i64 %n, 1
+ br i1 %cmp28, label %for.body, label %for.end
+
+for.body: ; preds = %for.body.lr.ph, %if.end
+ %j = phi i64 [ %n, %entry ], [ %div, %if.end ]
+ %div = lshr i64 %j, 1
+ %a.arrayidx = getelementptr inbounds i64, i64* %a, i64 %div
+ %a.j = load i64, i64* %a.arrayidx
+ %b.arrayidx = getelementptr inbounds i64, i64* %b, i64 %div
+ %b.j = load i64, i64* %b.arrayidx
+ %cmp.i = icmp slt i64 %a.j, %b.j
+ br i1 %cmp.i, label %for.end.loopexit, label %cond.false.i
+
+cond.false.i: ; preds = %for.body
+ %cmp4.i = icmp sgt i64 %a.j, %b.j
+ br i1 %cmp4.i, label %if.end, label %cond.false6.i
+
+cond.false6.i: ; preds = %cond.false.i
+ %c.arrayidx = getelementptr inbounds i64, i64* %c, i64 %div
+ %c.j = load i64, i64* %c.arrayidx
+ %d.arrayidx = getelementptr inbounds i64, i64* %d, i64 %div
+ %d.j = load i64, i64* %d.arrayidx
+ %cmp9.i = icmp slt i64 %c.j, %d.j
+ br i1 %cmp9.i, label %for.end.loopexit, label %cond.false11.i
+
+cond.false11.i: ; preds = %cond.false6.i
+ %cmp14.i = icmp sgt i64 %c.j, %d.j
+ br i1 %cmp14.i, label %if.end, label %cond.false12.i
+
+cond.false12.i: ; preds = %cond.false11.i
+ %e.arrayidx = getelementptr inbounds i64, i64* %e, i64 %div
+ %e.j = load i64, i64* %e.arrayidx
+ %f.arrayidx = getelementptr inbounds i64, i64* %f, i64 %div
+ %f.j = load i64, i64* %f.arrayidx
+ %cmp19.i = icmp sgt i64 %e.j, %f.j
+ br i1 %cmp19.i, label %if.end, label %for.end.loopexit
+
+if.end: ; preds = %cond.false12.i, %cond.false11.i, %cond.false.i
+ %cmp = icmp ugt i64 %j, 3
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %cond.false12.i, %cond.false6.i, %for.body, %if.end
+ %j.0.lcssa.ph = phi i64 [ %j, %cond.false12.i ], [ %j, %cond.false6.i ], [ %j, %for.body ], [ %div, %if.end ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %j.0.lcssa = phi i64 [ %n, %entry ], [ %j.0.lcssa.ph, %for.end.loopexit ]
+ %j.2 = add i64 %j.0.lcssa, %n
+ %j.3 = mul i64 %j.2, %n
+ %j.4 = add i64 %j.3, 10
+ ret i64 %j.4
+}
diff --git a/test/CodeGen/AArch64/vcvt-oversize.ll b/test/CodeGen/AArch64/vcvt-oversize.ll
index 066a4b666204..b6e25cfadaa9 100644
--- a/test/CodeGen/AArch64/vcvt-oversize.ll
+++ b/test/CodeGen/AArch64/vcvt-oversize.ll
@@ -2,8 +2,9 @@
define <8 x i8> @float_to_i8(<8 x float>* %in) {
; CHECK-LABEL: float_to_i8:
-; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v0.4s, v0.4s
-; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v1.4s, v1.4s
+; CHECK: ldp q1, q0, [x0]
+; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v1.4s, v1.4s
+; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v0.4s, v0.4s
; CHECK-DAG: fcvtzu v[[LSB2:[0-9]+]].4s, v[[LSB]].4s
; CHECK-DAG: fcvtzu v[[MSB2:[0-9]+]].4s, v[[MSB]].4s
; CHECK-DAG: xtn v[[TMP:[0-9]+]].4h, v[[LSB]].4s
diff --git a/test/CodeGen/AArch64/vector-fcopysign.ll b/test/CodeGen/AArch64/vector-fcopysign.ll
index 865a0a5b8580..47d75d5ecc61 100644
--- a/test/CodeGen/AArch64/vector-fcopysign.ll
+++ b/test/CodeGen/AArch64/vector-fcopysign.ll
@@ -7,7 +7,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
; WidenVecRes same
define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
; CHECK-LABEL: test_copysign_v1f32_v1f32:
-; CHECK-NEXT: movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT: movi.2s v2, #128, lsl #24
; CHECK-NEXT: bit.8b v0, v1, v2
; CHECK-NEXT: ret
%r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
@@ -18,7 +18,7 @@ define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0
define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
; CHECK-LABEL: test_copysign_v1f32_v1f64:
; CHECK-NEXT: fcvt s1, d1
-; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: movi.4s v2, #128, lsl #24
; CHECK-NEXT: bit.16b v0, v1, v2
; CHECK-NEXT: ret
%tmp0 = fptrunc <1 x double> %b to <1 x float>
@@ -59,7 +59,7 @@ declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
; CHECK-LABEL: test_copysign_v2f32_v2f32:
-; CHECK-NEXT: movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT: movi.2s v2, #128, lsl #24
; CHECK-NEXT: bit.8b v0, v1, v2
; CHECK-NEXT: ret
%r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
@@ -69,7 +69,7 @@ define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0
define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_copysign_v2f32_v2f64:
; CHECK-NEXT: fcvtn v1.2s, v1.2d
-; CHECK-NEXT: movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT: movi.2s v2, #128, lsl #24
; CHECK-NEXT: bit.8b v0, v1, v2
; CHECK-NEXT: ret
%tmp0 = fptrunc <2 x double> %b to <2 x float>
@@ -83,7 +83,7 @@ declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: test_copysign_v4f32_v4f32:
-; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: movi.4s v2, #128, lsl #24
; CHECK-NEXT: bit.16b v0, v1, v2
; CHECK-NEXT: ret
%r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
@@ -94,21 +94,21 @@ define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0
define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 {
; CHECK-LABEL: test_copysign_v4f32_v4f64:
; CHECK-NEXT: mov s3, v0[1]
-; CHECK-NEXT: mov d4, v1[1]
-; CHECK-NEXT: movi.4s v5, #0x80, lsl #24
-; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: movi.4s v4, #128, lsl #24
+; CHECK-NEXT: fcvt s5, d1
; CHECK-NEXT: mov s6, v0[2]
; CHECK-NEXT: mov s7, v0[3]
-; CHECK-NEXT: fcvt s16, d2
-; CHECK-NEXT: bit.16b v0, v1, v5
-; CHECK-NEXT: bit.16b v6, v16, v5
-; CHECK-NEXT: fcvt s1, d4
-; CHECK-NEXT: bit.16b v3, v1, v5
+; CHECK-NEXT: bit.16b v0, v5, v4
+; CHECK-NEXT: fcvt s5, d2
+; CHECK-NEXT: bit.16b v6, v5, v4
+; CHECK-NEXT: mov d1, v1[1]
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: bit.16b v3, v1, v4
; CHECK-NEXT: mov d1, v2[1]
; CHECK-NEXT: fcvt s1, d1
; CHECK-NEXT: ins.s v0[1], v3[0]
; CHECK-NEXT: ins.s v0[2], v6[0]
-; CHECK-NEXT: bit.16b v7, v1, v5
+; CHECK-NEXT: bit.16b v7, v1, v4
; CHECK-NEXT: ins.s v0[3], v7[0]
; CHECK-NEXT: ret
%tmp0 = fptrunc <4 x double> %b to <4 x float>
diff --git a/test/CodeGen/AArch64/vector_merge_dep_check.ll b/test/CodeGen/AArch64/vector_merge_dep_check.ll
new file mode 100644
index 000000000000..9220947e8362
--- /dev/null
+++ b/test/CodeGen/AArch64/vector_merge_dep_check.ll
@@ -0,0 +1,41 @@
+; RUN: llc --combiner-alias-analysis=false < %s | FileCheck %s
+; RUN: llc --combiner-alias-analysis=true < %s | FileCheck %s
+
+; This test checks that we do not merge stores together which have
+; dependencies through their non-chain operands (e.g. one store is the
+; chain ancestor of a load whose value is used in as the data for the
+; other store). Merging in such cases creates a loop in the DAG.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android"
+
+%"class.std::__1::complex.0.20.56.60.64.72.76.88.92.112.140.248" = type { float, float }
+
+; Function Attrs: noinline norecurse nounwind ssp uwtable
+define void @fn(<2 x i64>* %argA, <2 x i64>* %argB, i64* %a) #0 align 2 {
+ %_p_vec_full = load <2 x i64>, <2 x i64>* %argA, align 4, !alias.scope !1, !noalias !3
+ %x = extractelement <2 x i64> %_p_vec_full, i32 1
+ store i64 %x, i64* %a, align 8, !alias.scope !4, !noalias !9
+ %_p_vec_full155 = load <2 x i64>, <2 x i64>* %argB, align 4, !alias.scope !1, !noalias !3
+ %y = extractelement <2 x i64> %_p_vec_full155, i32 0
+ %scevgep41 = getelementptr i64, i64* %a, i64 -1
+ store i64 %y, i64* %scevgep41, align 8, !alias.scope !4, !noalias !9
+ ret void
+}
+
+; CHECK: ret
+
+attributes #0 = { noinline norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Snapdragon LLVM ARM Compiler 3.8.0 (based on LLVM 3.8.0)"}
+!1 = distinct !{!1, !2, !"polly.alias.scope.rhs"}
+!2 = distinct !{!2, !"polly.alias.scope.domain"}
+!3 = !{!4, !5, !6, !7, !8}
+!4 = distinct !{!4, !2, !"polly.alias.scope.blockB"}
+!5 = distinct !{!5, !2, !"polly.alias.scope.add28.lcssa.reg2mem"}
+!6 = distinct !{!6, !2, !"polly.alias.scope.count.0.lcssa.reg2mem"}
+!7 = distinct !{!7, !2, !"polly.alias.scope.mul"}
+!8 = distinct !{!8, !2, !"polly.alias.scope.add28.us.lcssa.reg2mem"}
+!9 = !{!1, !5, !6, !7, !8}