diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
| commit | eb11fae6d08f479c0799db45860a98af528fa6e7 (patch) | |
| tree | 44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /test/CodeGen/AArch64 | |
| parent | b8a2042aa938069e862750553db0e4d82d25822c (diff) | |
Notes
Diffstat (limited to 'test/CodeGen/AArch64')
374 files changed, 26167 insertions, 5580 deletions
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll index 40f65b3774ed..256bcf28e66c 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll @@ -13,12 +13,15 @@ target triple = "aarch64-apple-ios9.0" ; CHECK: [[F_ONE:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK: [[TWO:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 -; CHECK: %w0 = COPY [[ANSWER]] -; CHECK: %d0 = COPY [[D_ONE]] -; CHECK: %x1 = COPY [[TWELVE]] -; CHECK: G_STORE [[THREE]](s8), {{%[0-9]+}}(p0) :: (store 1 into stack, align 0) -; CHECK: G_STORE [[ONE]](s16), {{%[0-9]+}}(p0) :: (store 2 into stack + 8, align 0) -; CHECK: G_STORE [[FOUR]](s32), {{%[0-9]+}}(p0) :: (store 4 into stack + 16, align 0) +; CHECK: $w0 = COPY [[ANSWER]] +; CHECK: $d0 = COPY [[D_ONE]] +; CHECK: $x1 = COPY [[TWELVE]] +; CHECK: [[THREE_EXT:%[0-9]+]]:_(s64) = G_ANYEXT [[THREE]] +; CHECK: G_STORE [[THREE_EXT]](s64), {{%[0-9]+}}(p0) :: (store 8 into stack, align 0) +; CHECK: [[ONE_EXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ONE]] +; CHECK: G_STORE [[ONE_EXT]](s64), {{%[0-9]+}}(p0) :: (store 8 into stack + 8, align 0) +; CHECK: [[FOUR_EXT:%[0-9]+]]:_(s64) = G_ANYEXT [[FOUR]] +; CHECK: G_STORE [[FOUR_EXT]](s64), {{%[0-9]+}}(p0) :: (store 8 into stack + 16, align 0) ; CHECK: G_STORE [[F_ONE]](s32), {{%[0-9]+}}(p0) :: (store 4 into stack + 24, align 0) ; CHECK: G_STORE [[TWO]](s64), {{%[0-9]+}}(p0) :: (store 8 into stack + 32, align 0) declare void @varargs(i32, double, i64, ...) diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll index 3888628fd1ed..6bdcc4801ff2 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll @@ -4,15 +4,15 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linux-gnu" ; CHECK-LABEL: name: args_i32 -; CHECK: %[[ARG0:[0-9]+]]:_(s32) = COPY %w0 -; CHECK: %{{[0-9]+}}:_(s32) = COPY %w1 -; CHECK: %{{[0-9]+}}:_(s32) = COPY %w2 -; CHECK: %{{[0-9]+}}:_(s32) = COPY %w3 -; CHECK: %{{[0-9]+}}:_(s32) = COPY %w4 -; CHECK: %{{[0-9]+}}:_(s32) = COPY %w5 -; CHECK: %{{[0-9]+}}:_(s32) = COPY %w6 -; CHECK: %{{[0-9]+}}:_(s32) = COPY %w7 -; CHECK: %w0 = COPY %[[ARG0]] +; CHECK: %[[ARG0:[0-9]+]]:_(s32) = COPY $w0 +; CHECK: %{{[0-9]+}}:_(s32) = COPY $w1 +; CHECK: %{{[0-9]+}}:_(s32) = COPY $w2 +; CHECK: %{{[0-9]+}}:_(s32) = COPY $w3 +; CHECK: %{{[0-9]+}}:_(s32) = COPY $w4 +; CHECK: %{{[0-9]+}}:_(s32) = COPY $w5 +; CHECK: %{{[0-9]+}}:_(s32) = COPY $w6 +; CHECK: %{{[0-9]+}}:_(s32) = COPY $w7 +; CHECK: $w0 = COPY %[[ARG0]] define i32 @args_i32(i32 %w0, i32 %w1, i32 %w2, i32 %w3, i32 %w4, i32 %w5, i32 %w6, i32 %w7) { @@ -20,15 +20,15 @@ define i32 @args_i32(i32 %w0, i32 %w1, i32 %w2, i32 %w3, } ; CHECK-LABEL: name: args_i64 -; CHECK: %[[ARG0:[0-9]+]]:_(s64) = COPY %x0 -; CHECK: %{{[0-9]+}}:_(s64) = COPY %x1 -; CHECK: %{{[0-9]+}}:_(s64) = COPY %x2 -; CHECK: %{{[0-9]+}}:_(s64) = COPY %x3 -; CHECK: %{{[0-9]+}}:_(s64) = COPY %x4 -; CHECK: %{{[0-9]+}}:_(s64) = COPY %x5 -; CHECK: %{{[0-9]+}}:_(s64) = COPY %x6 -; CHECK: %{{[0-9]+}}:_(s64) = COPY %x7 -; CHECK: %x0 = COPY %[[ARG0]] +; CHECK: %[[ARG0:[0-9]+]]:_(s64) = COPY $x0 +; CHECK: %{{[0-9]+}}:_(s64) = COPY $x1 +; CHECK: %{{[0-9]+}}:_(s64) = COPY $x2 +; CHECK: %{{[0-9]+}}:_(s64) = COPY $x3 +; CHECK: %{{[0-9]+}}:_(s64) = COPY $x4 +; CHECK: %{{[0-9]+}}:_(s64) = COPY $x5 +; CHECK: %{{[0-9]+}}:_(s64) = COPY $x6 +; CHECK: %{{[0-9]+}}:_(s64) = COPY $x7 +; CHECK: $x0 = COPY %[[ARG0]] define i64 @args_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7) { ret i64 %x0 @@ -36,23 +36,23 @@ define i64 @args_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3, ; CHECK-LABEL: name: args_ptrs -; CHECK: %[[ARG0:[0-9]+]]:_(p0) = COPY %x0 -; CHECK: %{{[0-9]+}}:_(p0) = COPY %x1 -; CHECK: %{{[0-9]+}}:_(p0) = COPY %x2 -; CHECK: %{{[0-9]+}}:_(p0) = COPY %x3 -; CHECK: %{{[0-9]+}}:_(p0) = COPY %x4 -; CHECK: %{{[0-9]+}}:_(p0) = COPY %x5 -; CHECK: %{{[0-9]+}}:_(p0) = COPY %x6 -; CHECK: %{{[0-9]+}}:_(p0) = COPY %x7 -; CHECK: %x0 = COPY %[[ARG0]] +; CHECK: %[[ARG0:[0-9]+]]:_(p0) = COPY $x0 +; CHECK: %{{[0-9]+}}:_(p0) = COPY $x1 +; CHECK: %{{[0-9]+}}:_(p0) = COPY $x2 +; CHECK: %{{[0-9]+}}:_(p0) = COPY $x3 +; CHECK: %{{[0-9]+}}:_(p0) = COPY $x4 +; CHECK: %{{[0-9]+}}:_(p0) = COPY $x5 +; CHECK: %{{[0-9]+}}:_(p0) = COPY $x6 +; CHECK: %{{[0-9]+}}:_(p0) = COPY $x7 +; CHECK: $x0 = COPY %[[ARG0]] define i8* @args_ptrs(i8* %x0, i16* %x1, <2 x i8>* %x2, {i8, i16, i32}* %x3, [3 x float]* %x4, double* %x5, i8* %x6, i8* %x7) { ret i8* %x0 } ; CHECK-LABEL: name: args_arr -; CHECK: %[[ARG0:[0-9]+]]:_(s64) = COPY %d0 -; CHECK: %d0 = COPY %[[ARG0]] +; CHECK: %[[ARG0:[0-9]+]]:_(s64) = COPY $d0 +; CHECK: $d0 = COPY %[[ARG0]] define [1 x double] @args_arr([1 x double] %d0) { ret [1 x double] %d0 } @@ -67,16 +67,16 @@ define [1 x double] @args_arr([1 x double] %d0) { ; CHECK: [[F_ONE:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK: [[TWO:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 -; CHECK: %w0 = COPY [[ANSWER]] -; CHECK: %d0 = COPY [[D_ONE]] -; CHECK: %x1 = COPY [[TWELVE]] +; CHECK: $w0 = COPY [[ANSWER]] +; CHECK: $d0 = COPY [[D_ONE]] +; CHECK: $x1 = COPY [[TWELVE]] ; CHECK: [[THREE_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[THREE]] -; CHECK: %w2 = COPY [[THREE_TMP]](s32) +; CHECK: $w2 = COPY [[THREE_TMP]](s32) ; CHECK: [[ONE_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[ONE]] -; CHECK: %w3 = COPY [[ONE_TMP]](s32) -; CHECK: %w4 = COPY [[FOUR]](s32) -; CHECK: %s1 = COPY [[F_ONE]](s32) -; CHECK: %d2 = COPY [[TWO]](s64) +; CHECK: $w3 = COPY [[ONE_TMP]](s32) +; CHECK: $w4 = COPY [[FOUR]](s32) +; CHECK: $s1 = COPY [[F_ONE]](s32) +; CHECK: $d2 = COPY [[TWO]](s64) declare void @varargs(i32, double, i64, ...) define void @test_varargs() { call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i8 3, i16 1, i32 4, float 1.0, double 2.0) diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index 111aaf88b160..9c9d22d8ff6c 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -1,4 +1,4 @@ -; RUN: not llc -O0 -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +; RUN: not llc -O0 -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR ; RUN: llc -O0 -global-isel -global-isel-abort=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=FALLBACK ; RUN: llc -O0 -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o %t.out 2> %t.err ; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-OUT < %t.out @@ -32,21 +32,10 @@ define i128 @ABIi128(i128 %arg1) { ret i128 %res } -; It happens that we don't handle ConstantArray instances yet during -; translation. Any other constant would be fine too. - -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate constant: [1 x double] (in function: constant) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for constant -; FALLBACK-WITH-REPORT-OUT-LABEL: constant: -; FALLBACK-WITH-REPORT-OUT: fmov d0, #1.0 -define [1 x double] @constant() { - ret [1 x double] [double 1.0] -} - ; The key problem here is that we may fail to create an MBB referenced by a ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things ; happen. -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %6(s32), %2(p0); mem:ST4[%addr] GPR:%6,%2 (in function: pending_phis) +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %6:gpr(s32), %2:gpr(p0) :: (store seq_cst 4 into %ir.addr) (in function: pending_phis) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for pending_phis ; FALLBACK-WITH-REPORT-OUT-LABEL: pending_phis: define i32 @pending_phis(i1 %tst, i32 %val, i32* %addr) { @@ -65,8 +54,18 @@ false: } +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %0:_(s24) = G_LOAD %1:_(p0) :: (load 3 from `i24* undef`, align 1) (in function: odd_type_load) +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type_load +; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type_load +define i32 @odd_type_load() { +entry: + %ld = load i24, i24* undef, align 1 + %cst = zext i24 %ld to i32 + ret i32 %cst +} + ; General legalizer inability to handle types whose size wasn't a power of 2. -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1(s42), %0(p0); mem:ST6[%addr](align=8) (in function: odd_type) +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1:_(s42), %0:_(p0) :: (store 6 into %ir.addr, align 8) (in function: odd_type) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type: define void @odd_type(i42* %addr) { @@ -75,7 +74,7 @@ define void @odd_type(i42* %addr) { ret void } -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1(<7 x s32>), %0(p0); mem:ST28[%addr](align=32) (in function: odd_vector) +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1:_(<7 x s32>), %0:_(p0) :: (store 28 into %ir.addr, align 32) (in function: odd_vector) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: define void @odd_vector(<7 x i32>* %addr) { @@ -94,7 +93,7 @@ define i128 @sequence_sizes([8 x i8] %in) { } ; Just to make sure we don't accidentally emit a normal load/store. -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %2:gpr(s64) = G_LOAD %0(p0); mem:LD8[%addr] GPR:%2,%0 (in function: atomic_ops) +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %2:gpr(s64) = G_LOAD %0:gpr(p0) :: (load seq_cst 8 from %ir.addr) (in function: atomic_ops) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for atomic_ops ; FALLBACK-WITH-REPORT-LABEL: atomic_ops: define i64 @atomic_ops(i64* %addr) { @@ -142,7 +141,7 @@ define fp128 @test_quad_dump() { ret fp128 0xL00000000000000004000000000000000 } -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %0:_(p0) = G_EXTRACT_VECTOR_ELT %1(<2 x p0>), %2(s32); (in function: vector_of_pointers_extractelement) +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %0:_(p0) = G_EXTRACT_VECTOR_ELT %1:_(<2 x p0>), %2:_(s32) (in function: vector_of_pointers_extractelement) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_extractelement ; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_extractelement: @var = global <2 x i16*> zeroinitializer @@ -159,7 +158,7 @@ end: br label %block } -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %0(<2 x p0>), %4(p0); mem:ST16[undef] (in function: vector_of_pointers_insertelement) +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %0:_(<2 x p0>), %5:_(p0) :: (store 16 into `<2 x i16*>* undef`) (in function: vector_of_pointers_insertelement) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement ; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement: define void @vector_of_pointers_insertelement() { @@ -175,17 +174,7 @@ end: br label %block } -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1(s96), %3(p0); mem:ST12[undef](align=4) (in function: nonpow2_insertvalue_narrowing) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_insertvalue_narrowing -; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_insertvalue_narrowing: -%struct96 = type { float, float, float } -define void @nonpow2_insertvalue_narrowing(float %a) { - %dummy = insertvalue %struct96 undef, float %a, 0 - store %struct96 %dummy, %struct96* undef - ret void -} - -; FALLBACK-WITH-REPORT-ERR remark: <unknown>:0:0: unable to legalize instruction: G_STORE %3, %4; mem:ST12[undef](align=16) (in function: nonpow2_add_narrowing) +; FALLBACK-WITH-REPORT-ERR remark: <unknown>:0:0: unable to legalize instruction: G_STORE %3, %4 :: (store 12 into `i96* undef`, align 16) (in function: nonpow2_add_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing: define void @nonpow2_add_narrowing() { @@ -196,7 +185,7 @@ define void @nonpow2_add_narrowing() { ret void } -; FALLBACK-WITH-REPORT-ERR remark: <unknown>:0:0: unable to legalize instruction: G_STORE %3, %4; mem:ST12[undef](align=16) (in function: nonpow2_add_narrowing) +; FALLBACK-WITH-REPORT-ERR remark: <unknown>:0:0: unable to legalize instruction: G_STORE %3, %4 :: (store 12 into `i96* undef`, align 16) (in function: nonpow2_add_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_or_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_or_narrowing: define void @nonpow2_or_narrowing() { @@ -207,7 +196,7 @@ define void @nonpow2_or_narrowing() { ret void } -; FALLBACK-WITH-REPORT-ERR remark: <unknown>:0:0: unable to legalize instruction: G_STORE %0, %1; mem:ST12[undef](align=16) (in function: nonpow2_load_narrowing) +; FALLBACK-WITH-REPORT-ERR remark: <unknown>:0:0: unable to legalize instruction: G_STORE %0, %1 :: (store 12 into `i96* undef`, align 16) (in function: nonpow2_load_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_load_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_load_narrowing: define void @nonpow2_load_narrowing() { @@ -216,7 +205,7 @@ define void @nonpow2_load_narrowing() { ret void } -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %3(s96), %0(p0); mem:ST12[%c](align=16) (in function: nonpow2_store_narrowing +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %3:_(s96), %0:_(p0) :: (store 12 into %ir.c, align 16) (in function: nonpow2_store_narrowing ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_store_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_store_narrowing: define void @nonpow2_store_narrowing(i96* %c) { @@ -226,7 +215,7 @@ define void @nonpow2_store_narrowing(i96* %c) { ret void } -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %0(s96), %1(p0); mem:ST12[undef](align=16) (in function: nonpow2_constant_narrowing) +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %0:_(s96), %1:_(p0) :: (store 12 into `i96* undef`, align 16) (in function: nonpow2_constant_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_constant_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_constant_narrowing: define void @nonpow2_constant_narrowing() { @@ -236,8 +225,8 @@ define void @nonpow2_constant_narrowing() { ; Currently can't handle vector lengths that aren't an exact multiple of ; natively supported vector lengths. Test that the fall-back works for those. -; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: <unknown>:0:0: unable to legalize instruction: %1(<7 x s64>) = G_ADD %0, %0; (in function: nonpow2_vector_add_fewerelements -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %2:_(s64) = G_EXTRACT_VECTOR_ELT %1(<7 x s64>), %3(s64); (in function: nonpow2_vector_add_fewerelements) +; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: <unknown>:0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %2:_(s64) = G_EXTRACT_VECTOR_ELT %1:_(<7 x s64>), %3:_(s64) (in function: nonpow2_vector_add_fewerelements) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements: define void @nonpow2_vector_add_fewerelements() { @@ -246,3 +235,26 @@ define void @nonpow2_vector_add_fewerelements() { store i64 %ex, i64* undef ret void } + +%swift_error = type {i64, i8} + +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to lower arguments due to swifterror/swiftself: void (%swift_error**)* (in function: swifterror_param) +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for swifterror_param +define void @swifterror_param(%swift_error** swifterror %error_ptr_ref) { + ret void +} + +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: alloca: ' %error_ptr_ref = alloca swifterror %swift_error*' (in function: swifterror_alloca) +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for swifterror_alloca +; We can't currently test the call parameters being swifterror because the value +; must come from a swifterror alloca or parameter, at which point we already +; fallback. As long as those cases work however we should be fine. +define void @swifterror_alloca(i8* %error_ref) { +entry: + %error_ptr_ref = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr_ref + call void @swifterror_param(%swift_error** swifterror %error_ptr_ref) + ret void +} + + diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-fmuladd.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-fmuladd.ll new file mode 100644 index 000000000000..371255a11dea --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-fmuladd.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -o - -verify-machineinstrs -global-isel -stop-after=irtranslator -fp-contract=fast %s | FileCheck %s --check-prefix=FPFAST +; RUN: llc -o - -verify-machineinstrs -global-isel -stop-after=irtranslator -fp-contract=off %s | FileCheck %s --check-prefix=FPOFF +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--" + +define float @test_fmuladd(float %x, float %y, float %z) { + ; FPFAST-LABEL: name: test_fmuladd + ; FPFAST: bb.1 (%ir-block.0): + ; FPFAST: liveins: $s0, $s1, $s2 + ; FPFAST: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; FPFAST: [[COPY1:%[0-9]+]]:_(s32) = COPY $s1 + ; FPFAST: [[COPY2:%[0-9]+]]:_(s32) = COPY $s2 + ; FPFAST: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; FPFAST: $s0 = COPY [[FMA]](s32) + ; FPFAST: RET_ReallyLR implicit $s0 + ; FPOFF-LABEL: name: test_fmuladd + ; FPOFF: bb.1 (%ir-block.0): + ; FPOFF: liveins: $s0, $s1, $s2 + ; FPOFF: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; FPOFF: [[COPY1:%[0-9]+]]:_(s32) = COPY $s1 + ; FPOFF: [[COPY2:%[0-9]+]]:_(s32) = COPY $s2 + ; FPOFF: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] + ; FPOFF: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + ; FPOFF: $s0 = COPY [[FADD]](s32) + ; FPOFF: RET_ReallyLR implicit $s0 + %res = call float @llvm.fmuladd.f32(float %x, float %y, float %z) + ret float %res +} + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.fmuladd.f32(float, float, float) #0 + +attributes #0 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index e78683279754..65c6a4f90c70 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -7,22 +7,22 @@ target triple = "aarch64--" ; Tests for add. ; CHECK-LABEL: name: addi64 -; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_ADD [[ARG1]], [[ARG2]] -; CHECK-NEXT: %x0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %x0 +; CHECK-NEXT: $x0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $x0 define i64 @addi64(i64 %arg1, i64 %arg2) { %res = add i64 %arg1, %arg2 ret i64 %res } ; CHECK-LABEL: name: muli64 -; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_MUL [[ARG1]], [[ARG2]] -; CHECK-NEXT: %x0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %x0 +; CHECK-NEXT: $x0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $x0 define i64 @muli64(i64 %arg1, i64 %arg2) { %res = mul i64 %arg1, %arg2 ret i64 %res @@ -33,13 +33,13 @@ define i64 @muli64(i64 %arg1, i64 %arg2) { ; CHECK: stack: ; CHECK-NEXT: - { id: 0, name: ptr1, type: default, offset: 0, size: 8, alignment: 8, ; CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, -; CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } +; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; CHECK-NEXT: - { id: 1, name: ptr2, type: default, offset: 0, size: 8, alignment: 1, ; CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, -; CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } +; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; CHECK-NEXT: - { id: 2, name: ptr3, type: default, offset: 0, size: 128, alignment: 8, ; CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, -; CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } +; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; CHECK-NEXT: - { id: 3, name: ptr4, type: default, offset: 0, size: 1, alignment: 8, ; CHECK: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %stack.0.ptr1 ; CHECK: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %stack.1.ptr2 @@ -107,7 +107,7 @@ end: ; CHECK-NEXT: successors: %[[TRUE:bb.[0-9]+]](0x40000000), ; CHECK: %[[FALSE:bb.[0-9]+]](0x40000000) ; -; CHECK: [[ADDR:%.*]]:_(p0) = COPY %x0 +; CHECK: [[ADDR:%.*]]:_(p0) = COPY $x0 ; ; Check that we emit the correct branch. ; CHECK: [[TST:%.*]]:_(s1) = G_LOAD [[ADDR]](p0) @@ -135,7 +135,7 @@ false: ; ; CHECK: bb.{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: successors: %[[BB_CASE100:bb.[0-9]+]](0x40000000), %[[BB_NOTCASE100_CHECKNEXT:bb.[0-9]+]](0x40000000) -; CHECK: %0:_(s32) = COPY %w0 +; CHECK: %0:_(s32) = COPY $w0 ; CHECK: %[[reg100:[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; CHECK: %[[reg200:[0-9]+]]:_(s32) = G_CONSTANT i32 200 ; CHECK: %[[reg0:[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -171,8 +171,8 @@ false: ; ; CHECK: [[BB_RET]].{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: %[[regret:[0-9]+]]:_(s32) = G_PHI %[[regretdefault]](s32), %[[BB_DEFAULT]], %[[regretc100]](s32), %[[BB_CASE100]] -; CHECK: %w0 = COPY %[[regret]](s32) -; CHECK: RET_ReallyLR implicit %w0 +; CHECK: $w0 = COPY %[[regret]](s32) +; CHECK: RET_ReallyLR implicit $w0 ; define i32 @switch(i32 %argc) { entry: @@ -289,22 +289,22 @@ L2: ; preds = %L1 ; Tests for or. ; CHECK-LABEL: name: ori64 -; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_OR [[ARG1]], [[ARG2]] -; CHECK-NEXT: %x0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %x0 +; CHECK-NEXT: $x0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $x0 define i64 @ori64(i64 %arg1, i64 %arg2) { %res = or i64 %arg1, %arg2 ret i64 %res } ; CHECK-LABEL: name: ori32 -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_OR [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @ori32(i32 %arg1, i32 %arg2) { %res = or i32 %arg1, %arg2 ret i32 %res @@ -312,22 +312,22 @@ define i32 @ori32(i32 %arg1, i32 %arg2) { ; Tests for xor. ; CHECK-LABEL: name: xori64 -; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_XOR [[ARG1]], [[ARG2]] -; CHECK-NEXT: %x0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %x0 +; CHECK-NEXT: $x0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $x0 define i64 @xori64(i64 %arg1, i64 %arg2) { %res = xor i64 %arg1, %arg2 ret i64 %res } ; CHECK-LABEL: name: xori32 -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_XOR [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @xori32(i32 %arg1, i32 %arg2) { %res = xor i32 %arg1, %arg2 ret i32 %res @@ -335,22 +335,22 @@ define i32 @xori32(i32 %arg1, i32 %arg2) { ; Tests for and. ; CHECK-LABEL: name: andi64 -; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_AND [[ARG1]], [[ARG2]] -; CHECK-NEXT: %x0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %x0 +; CHECK-NEXT: $x0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $x0 define i64 @andi64(i64 %arg1, i64 %arg2) { %res = and i64 %arg1, %arg2 ret i64 %res } ; CHECK-LABEL: name: andi32 -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_AND [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @andi32(i32 %arg1, i32 %arg2) { %res = and i32 %arg1, %arg2 ret i32 %res @@ -358,58 +358,58 @@ define i32 @andi32(i32 %arg1, i32 %arg2) { ; Tests for sub. ; CHECK-LABEL: name: subi64 -; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_SUB [[ARG1]], [[ARG2]] -; CHECK-NEXT: %x0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %x0 +; CHECK-NEXT: $x0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $x0 define i64 @subi64(i64 %arg1, i64 %arg2) { %res = sub i64 %arg1, %arg2 ret i64 %res } ; CHECK-LABEL: name: subi32 -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_SUB [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @subi32(i32 %arg1, i32 %arg2) { %res = sub i32 %arg1, %arg2 ret i32 %res } ; CHECK-LABEL: name: ptrtoint -; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_PTRTOINT [[ARG1]] -; CHECK: %x0 = COPY [[RES]] -; CHECK: RET_ReallyLR implicit %x0 +; CHECK: $x0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $x0 define i64 @ptrtoint(i64* %a) { %val = ptrtoint i64* %a to i64 ret i64 %val } ; CHECK-LABEL: name: inttoptr -; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[RES:%[0-9]+]]:_(p0) = G_INTTOPTR [[ARG1]] -; CHECK: %x0 = COPY [[RES]] -; CHECK: RET_ReallyLR implicit %x0 +; CHECK: $x0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $x0 define i64* @inttoptr(i64 %a) { %val = inttoptr i64 %a to i64* ret i64* %val } ; CHECK-LABEL: name: trivial_bitcast -; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: %x0 = COPY [[ARG1]] -; CHECK: RET_ReallyLR implicit %x0 +; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: $x0 = COPY [[ARG1]] +; CHECK: RET_ReallyLR implicit $x0 define i64* @trivial_bitcast(i8* %a) { %val = bitcast i8* %a to i64* ret i64* %val } ; CHECK-LABEL: name: trivial_bitcast_with_copy -; CHECK: [[A:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[A:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: G_BR %[[CAST:bb\.[0-9]+]] ; CHECK: [[END:bb\.[0-9]+]].{{[a-zA-Z0-9.]+}}: @@ -429,19 +429,31 @@ cast: } ; CHECK-LABEL: name: bitcast -; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[RES1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[ARG1]] ; CHECK: [[RES2:%[0-9]+]]:_(s64) = G_BITCAST [[RES1]] -; CHECK: %x0 = COPY [[RES2]] -; CHECK: RET_ReallyLR implicit %x0 +; CHECK: $x0 = COPY [[RES2]] +; CHECK: RET_ReallyLR implicit $x0 define i64 @bitcast(i64 %a) { %res1 = bitcast i64 %a to <2 x i32> %res2 = bitcast <2 x i32> %res1 to i64 ret i64 %res2 } +; CHECK-LABEL: name: addrspacecast +; CHECK: [[ARG1:%[0-9]+]]:_(p1) = COPY $x0 +; CHECK: [[RES1:%[0-9]+]]:_(p2) = G_ADDRSPACE_CAST [[ARG1]] +; CHECK: [[RES2:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[RES1]] +; CHECK: $x0 = COPY [[RES2]] +; CHECK: RET_ReallyLR implicit $x0 +define i64* @addrspacecast(i32 addrspace(1)* %a) { + %res1 = addrspacecast i32 addrspace(1)* %a to i64 addrspace(2)* + %res2 = addrspacecast i64 addrspace(2)* %res1 to i64* + ret i64* %res2 +} + ; CHECK-LABEL: name: trunc -; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[VEC:%[0-9]+]]:_(<4 x s32>) = G_LOAD ; CHECK: [[RES1:%[0-9]+]]:_(s8) = G_TRUNC [[ARG1]] ; CHECK: [[RES2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[VEC]] @@ -454,15 +466,15 @@ define void @trunc(i64 %a) { } ; CHECK-LABEL: name: load -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[ADDR42:%[0-9]+]]:_(p42) = COPY %x1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[ADDR42:%[0-9]+]]:_(p42) = COPY $x1 ; CHECK: [[VAL1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, align 16) -; CHECK: [[VAL2:%[0-9]+]]:_(s64) = G_LOAD [[ADDR42]](p42) :: (load 8 from %ir.addr42) +; CHECK: [[VAL2:%[0-9]+]]:_(s64) = G_LOAD [[ADDR42]](p42) :: (load 8 from %ir.addr42, addrspace 42) ; CHECK: [[SUM2:%.*]]:_(s64) = G_ADD [[VAL1]], [[VAL2]] ; CHECK: [[VAL3:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (volatile load 8 from %ir.addr) ; CHECK: [[SUM3:%[0-9]+]]:_(s64) = G_ADD [[SUM2]], [[VAL3]] -; CHECK: %x0 = COPY [[SUM3]] -; CHECK: RET_ReallyLR implicit %x0 +; CHECK: $x0 = COPY [[SUM3]] +; CHECK: RET_ReallyLR implicit $x0 define i64 @load(i64* %addr, i64 addrspace(42)* %addr42) { %val1 = load i64, i64* %addr, align 16 @@ -475,12 +487,12 @@ define i64 @load(i64* %addr, i64 addrspace(42)* %addr42) { } ; CHECK-LABEL: name: store -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[ADDR42:%[0-9]+]]:_(p42) = COPY %x1 -; CHECK: [[VAL1:%[0-9]+]]:_(s64) = COPY %x2 -; CHECK: [[VAL2:%[0-9]+]]:_(s64) = COPY %x3 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[ADDR42:%[0-9]+]]:_(p42) = COPY $x1 +; CHECK: [[VAL1:%[0-9]+]]:_(s64) = COPY $x2 +; CHECK: [[VAL2:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr, align 16) -; CHECK: G_STORE [[VAL2]](s64), [[ADDR42]](p42) :: (store 8 into %ir.addr42) +; CHECK: G_STORE [[VAL2]](s64), [[ADDR42]](p42) :: (store 8 into %ir.addr42, addrspace 42) ; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (volatile store 8 into %ir.addr) ; CHECK: RET_ReallyLR define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2) { @@ -492,8 +504,8 @@ define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2) } ; CHECK-LABEL: name: intrinsics -; CHECK: [[CUR:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: [[BITS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[CUR:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[BITS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[CREG:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), [[CREG]] ; CHECK: [[PTR_VEC:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr.vec @@ -522,7 +534,7 @@ define void @intrinsics(i32 %cur, i32 %bits) { ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_LOAD ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_PHI [[RES1]](s32), %[[TRUE]], [[RES2]](s32), %[[FALSE]] -; CHECK: %w0 = COPY [[RES]] +; CHECK: $w0 = COPY [[RES]] define i32 @test_phi(i32* %addr1, i32* %addr2, i1 %tst) { br i1 %tst, label %true, label %false @@ -551,14 +563,14 @@ define void @unreachable(i32 %a) { ; It's important that constants are after argument passing, but before the ; rest of the entry block. ; CHECK-LABEL: name: constant_int -; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[ONE:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: bb.{{[0-9]+}}.{{[a-zA-Z0-9.]+}}: ; CHECK: [[SUM1:%[0-9]+]]:_(s32) = G_ADD [[IN]], [[ONE]] ; CHECK: [[SUM2:%[0-9]+]]:_(s32) = G_ADD [[IN]], [[ONE]] ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ADD [[SUM1]], [[SUM2]] -; CHECK: %w0 = COPY [[RES]] +; CHECK: $w0 = COPY [[RES]] define i32 @constant_int(i32 %in) { br label %next @@ -581,7 +593,7 @@ define i32 @constant_int_start() { ; CHECK-LABEL: name: test_undef ; CHECK: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF -; CHECK: %w0 = COPY [[UNDEF]] +; CHECK: $w0 = COPY [[UNDEF]] define i32 @test_undef() { ret i32 undef } @@ -589,7 +601,7 @@ define i32 @test_undef() { ; CHECK-LABEL: name: test_constant_inttoptr ; CHECK: [[ONE:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ONE]] -; CHECK: %x0 = COPY [[PTR]] +; CHECK: $x0 = COPY [[PTR]] define i8* @test_constant_inttoptr() { ret i8* inttoptr(i64 1 to i8*) } @@ -598,35 +610,35 @@ define i8* @test_constant_inttoptr() { ; functions, so reuse the "i64 1" from above. ; CHECK-LABEL: name: test_reused_constant ; CHECK: [[ONE:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 -; CHECK: %x0 = COPY [[ONE]] +; CHECK: $x0 = COPY [[ONE]] define i64 @test_reused_constant() { ret i64 1 } ; CHECK-LABEL: name: test_sext -; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_SEXT [[IN]] -; CHECK: %x0 = COPY [[RES]] +; CHECK: $x0 = COPY [[RES]] define i64 @test_sext(i32 %in) { %res = sext i32 %in to i64 ret i64 %res } ; CHECK-LABEL: name: test_zext -; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ZEXT [[IN]] -; CHECK: %x0 = COPY [[RES]] +; CHECK: $x0 = COPY [[RES]] define i64 @test_zext(i32 %in) { %res = zext i32 %in to i64 ret i64 %res } ; CHECK-LABEL: name: test_shl -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_SHL [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @test_shl(i32 %arg1, i32 %arg2) { %res = shl i32 %arg1, %arg2 ret i32 %res @@ -634,82 +646,89 @@ define i32 @test_shl(i32 %arg1, i32 %arg2) { ; CHECK-LABEL: name: test_lshr -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_LSHR [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @test_lshr(i32 %arg1, i32 %arg2) { %res = lshr i32 %arg1, %arg2 ret i32 %res } ; CHECK-LABEL: name: test_ashr -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_ASHR [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @test_ashr(i32 %arg1, i32 %arg2) { %res = ashr i32 %arg1, %arg2 ret i32 %res } ; CHECK-LABEL: name: test_sdiv -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_SDIV [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @test_sdiv(i32 %arg1, i32 %arg2) { %res = sdiv i32 %arg1, %arg2 ret i32 %res } ; CHECK-LABEL: name: test_udiv -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_UDIV [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @test_udiv(i32 %arg1, i32 %arg2) { %res = udiv i32 %arg1, %arg2 ret i32 %res } ; CHECK-LABEL: name: test_srem -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_SREM [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @test_srem(i32 %arg1, i32 %arg2) { %res = srem i32 %arg1, %arg2 ret i32 %res } ; CHECK-LABEL: name: test_urem -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_UREM [[ARG1]], [[ARG2]] -; CHECK-NEXT: %w0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %w0 +; CHECK-NEXT: $w0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $w0 define i32 @test_urem(i32 %arg1, i32 %arg2) { %res = urem i32 %arg1, %arg2 ret i32 %res } ; CHECK-LABEL: name: test_constant_null -; CHECK: [[NULL:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 -; CHECK: %x0 = COPY [[NULL]] +; CHECK: [[ZERO:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[NULL:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZERO]] +; CHECK: $x0 = COPY [[NULL]] define i8* @test_constant_null() { ret i8* null } ; CHECK-LABEL: name: test_struct_memops -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, align 4) -; CHECK: G_STORE [[VAL]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr, align 4) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[VAL1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64) +; CHECK: [[VAL2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.addr + 4) +; CHECK: G_STORE [[VAL1]](s8), [[ADDR]](p0) :: (store 1 into %ir.addr, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64) +; CHECK: G_STORE [[VAL2]](s32), [[GEP2]](p0) :: (store 4 into %ir.addr + 4) define void @test_struct_memops({ i8, i32 }* %addr) { %val = load { i8, i32 }, { i8, i32 }* %addr store { i8, i32 } %val, { i8, i32 }* %addr @@ -717,7 +736,7 @@ define void @test_struct_memops({ i8, i32 }* %addr) { } ; CHECK-LABEL: name: test_i1_memops -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(s1) = G_LOAD [[ADDR]](p0) :: (load 1 from %ir.addr) ; CHECK: G_STORE [[VAL]](s1), [[ADDR]](p0) :: (store 1 into %ir.addr) define void @test_i1_memops(i1* %addr) { @@ -727,9 +746,9 @@ define void @test_i1_memops(i1* %addr) { } ; CHECK-LABEL: name: int_comparison -; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LHS]](s32), [[RHS]] ; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0) define void @int_comparison(i32 %a, i32 %b, i1* %addr) { @@ -739,9 +758,9 @@ define void @int_comparison(i32 %a, i32 %b, i1* %addr) { } ; CHECK-LABEL: name: ptr_comparison -; CHECK: [[LHS:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[RHS:%[0-9]+]]:_(p0) = COPY %x1 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[RHS:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LHS]](p0), [[RHS]] ; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0) define void @ptr_comparison(i8* %a, i8* %b, i1* %addr) { @@ -751,69 +770,69 @@ define void @ptr_comparison(i8* %a, i8* %b, i1* %addr) { } ; CHECK-LABEL: name: test_fadd -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $s1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FADD [[ARG1]], [[ARG2]] -; CHECK-NEXT: %s0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %s0 +; CHECK-NEXT: $s0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $s0 define float @test_fadd(float %arg1, float %arg2) { %res = fadd float %arg1, %arg2 ret float %res } ; CHECK-LABEL: name: test_fsub -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $s1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FSUB [[ARG1]], [[ARG2]] -; CHECK-NEXT: %s0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %s0 +; CHECK-NEXT: $s0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $s0 define float @test_fsub(float %arg1, float %arg2) { %res = fsub float %arg1, %arg2 ret float %res } ; CHECK-LABEL: name: test_fmul -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $s1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FMUL [[ARG1]], [[ARG2]] -; CHECK-NEXT: %s0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %s0 +; CHECK-NEXT: $s0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $s0 define float @test_fmul(float %arg1, float %arg2) { %res = fmul float %arg1, %arg2 ret float %res } ; CHECK-LABEL: name: test_fdiv -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $s1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FDIV [[ARG1]], [[ARG2]] -; CHECK-NEXT: %s0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %s0 +; CHECK-NEXT: $s0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $s0 define float @test_fdiv(float %arg1, float %arg2) { %res = fdiv float %arg1, %arg2 ret float %res } ; CHECK-LABEL: name: test_frem -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY $s1 ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FREM [[ARG1]], [[ARG2]] -; CHECK-NEXT: %s0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit %s0 +; CHECK-NEXT: $s0 = COPY [[RES]] +; CHECK-NEXT: RET_ReallyLR implicit $s0 define float @test_frem(float %arg1, float %arg2) { %res = frem float %arg1, %arg2 ret float %res } ; CHECK-LABEL: name: test_sadd_overflow -; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SADDO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -822,15 +841,15 @@ define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { } ; CHECK-LABEL: name: test_uadd_overflow -; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[ZERO:%[0-9]+]]:_(s1) = G_CONSTANT i1 false ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UADDE [[LHS]], [[RHS]], [[ZERO]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -839,14 +858,14 @@ define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { } ; CHECK-LABEL: name: test_ssub_overflow -; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SSUBO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.subr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -855,15 +874,15 @@ define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { } ; CHECK-LABEL: name: test_usub_overflow -; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[ZERO:%[0-9]+]]:_(s1) = G_CONSTANT i1 false ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_USUBE [[LHS]], [[RHS]], [[ZERO]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.subr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { %res = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -872,14 +891,14 @@ define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { } ; CHECK-LABEL: name: test_smul_overflow -; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SMULO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -888,14 +907,14 @@ define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { } ; CHECK-LABEL: name: test_umul_overflow -; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UMULO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 -; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -904,9 +923,18 @@ define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { } ; CHECK-LABEL: name: test_extractvalue -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD -; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT [[STRUCT]](s128), 64 -; CHECK: %w0 = COPY [[RES]] +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12) +; CHECK: $w0 = COPY [[LD3]](s32) %struct.nested = type {i8, { i8, i32 }, i32} define i32 @test_extractvalue(%struct.nested* %addr) { %struct = load %struct.nested, %struct.nested* %addr @@ -915,9 +943,22 @@ define i32 @test_extractvalue(%struct.nested* %addr) { } ; CHECK-LABEL: name: test_extractvalue_agg -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 32 -; CHECK: G_STORE [[RES]] +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: %1:_(p0) = COPY $x1 +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12) +; CHECK: G_STORE [[LD2]](s8), %1(p0) :: (store 1 into %ir.addr2, align 4) +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP %1, [[CST4]](s64) +; CHECK: G_STORE [[LD3]](s32), [[GEP4]](p0) :: (store 4 into %ir.addr2 + 4) define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { %struct = load %struct.nested, %struct.nested* %addr %res = extractvalue %struct.nested %struct, 1 @@ -926,10 +967,28 @@ define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { } ; CHECK-LABEL: name: test_insertvalue -; CHECK: [[VAL:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD -; CHECK: [[NEWSTRUCT:%[0-9]+]]:_(s128) = G_INSERT [[STRUCT]], [[VAL]](s32), 64 -; CHECK: G_STORE [[NEWSTRUCT]](s128), +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: %1:_(s32) = COPY $w1 +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12) +; CHECK: G_STORE [[LD1]](s8), %0(p0) :: (store 1 into %ir.addr, align 4) +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP %0, [[CST4]](s64) +; CHECK: G_STORE [[LD2]](s8), [[GEP4]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP %0, [[CST5]](s64) +; CHECK: G_STORE %1(s32), [[GEP5]](p0) :: (store 4 into %ir.addr + 8) +; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP %0, [[CST6]](s64) +; CHECK: G_STORE [[LD4]](s32), [[GEP6]](p0) :: (store 4 into %ir.addr + 12) define void @test_insertvalue(%struct.nested* %addr, i32 %val) { %struct = load %struct.nested, %struct.nested* %addr %newstruct = insertvalue %struct.nested %struct, i32 %val, 1, 1 @@ -939,29 +998,49 @@ define void @test_insertvalue(%struct.nested* %addr, i32 %val) { define [1 x i64] @test_trivial_insert([1 x i64] %s, i64 %val) { ; CHECK-LABEL: name: test_trivial_insert -; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]]:_(s64) = COPY %x1 -; CHECK: [[RES:%[0-9]+]]:_(s64) = COPY [[VAL]](s64) -; CHECK: %x0 = COPY [[RES]] +; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK: [[VAL:%[0-9]+]]:_(s64) = COPY $x1 +; CHECK: $x0 = COPY [[VAL]] %res = insertvalue [1 x i64] %s, i64 %val, 0 ret [1 x i64] %res } define [1 x i8*] @test_trivial_insert_ptr([1 x i8*] %s, i8* %val) { ; CHECK-LABEL: name: test_trivial_insert_ptr -; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]]:_(p0) = COPY %x1 -; CHECK: [[RES:%[0-9]+]]:_(s64) = G_PTRTOINT [[VAL]](p0) -; CHECK: %x0 = COPY [[RES]] +; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK: [[VAL:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: $x0 = COPY [[VAL]] %res = insertvalue [1 x i8*] %s, i8* %val, 0 ret [1 x i8*] %res } ; CHECK-LABEL: name: test_insertvalue_agg -; CHECK: [[SMALLSTRUCT:%[0-9]+]]:_(s64) = G_LOAD -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD -; CHECK: [[RES:%[0-9]+]]:_(s128) = G_INSERT [[STRUCT]], [[SMALLSTRUCT]](s64), 32 -; CHECK: G_STORE [[RES]](s128) +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: %1:_(p0) = COPY $x1 +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %1(p0) :: (load 1 from %ir.addr2, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %1, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.addr2 + 4) +; CHECK: [[LD3:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[GEP2]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP %0, [[CST4]](s64) +; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.addr + 12) +; CHECK: G_STORE [[LD3]](s8), %0(p0) :: (store 1 into %ir.addr, align 4) +; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP %0, [[CST5]](s64) +; CHECK: G_STORE [[LD1]](s8), [[GEP5]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP %0, [[CST6]](s64) +; CHECK: G_STORE [[LD2]](s32), [[GEP6]](p0) :: (store 4 into %ir.addr + 8) +; CHECK: [[CST7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_GEP %0, [[CST7]](s64) +; CHECK: G_STORE [[LD6]](s32), [[GEP7]](p0) :: (store 4 into %ir.addr + 12) define void @test_insertvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { %smallstruct = load {i8, i32}, {i8, i32}* %addr2 %struct = load %struct.nested, %struct.nested* %addr @@ -971,48 +1050,48 @@ define void @test_insertvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { } ; CHECK-LABEL: name: test_select -; CHECK: [[TST_C:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[TST_C:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_TRUNC [[TST_C]] -; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w2 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $w1 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w2 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] -; CHECK: %w0 = COPY [[RES]] +; CHECK: $w0 = COPY [[RES]] define i32 @test_select(i1 %tst, i32 %lhs, i32 %rhs) { %res = select i1 %tst, i32 %lhs, i32 %rhs ret i32 %res } ; CHECK-LABEL: name: test_select_ptr -; CHECK: [[TST_C:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[TST_C:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_TRUNC [[TST_C]] -; CHECK: [[LHS:%[0-9]+]]:_(p0) = COPY %x1 -; CHECK: [[RHS:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: [[RHS:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[RES:%[0-9]+]]:_(p0) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] -; CHECK: %x0 = COPY [[RES]] +; CHECK: $x0 = COPY [[RES]] define i8* @test_select_ptr(i1 %tst, i8* %lhs, i8* %rhs) { %res = select i1 %tst, i8* %lhs, i8* %rhs ret i8* %res } ; CHECK-LABEL: name: test_select_vec -; CHECK: [[TST_C:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[TST_C:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_TRUNC [[TST_C]] -; CHECK: [[LHS:%[0-9]+]]:_(<4 x s32>) = COPY %q0 -; CHECK: [[RHS:%[0-9]+]]:_(<4 x s32>) = COPY %q1 +; CHECK: [[LHS:%[0-9]+]]:_(<4 x s32>) = COPY $q0 +; CHECK: [[RHS:%[0-9]+]]:_(<4 x s32>) = COPY $q1 ; CHECK: [[RES:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] -; CHECK: %q0 = COPY [[RES]] +; CHECK: $q0 = COPY [[RES]] define <4 x i32> @test_select_vec(i1 %tst, <4 x i32> %lhs, <4 x i32> %rhs) { %res = select i1 %tst, <4 x i32> %lhs, <4 x i32> %rhs ret <4 x i32> %res } ; CHECK-LABEL: name: test_vselect_vec -; CHECK: [[TST32:%[0-9]+]]:_(<4 x s32>) = COPY %q0 -; CHECK: [[LHS:%[0-9]+]]:_(<4 x s32>) = COPY %q1 -; CHECK: [[RHS:%[0-9]+]]:_(<4 x s32>) = COPY %q2 +; CHECK: [[TST32:%[0-9]+]]:_(<4 x s32>) = COPY $q0 +; CHECK: [[LHS:%[0-9]+]]:_(<4 x s32>) = COPY $q1 +; CHECK: [[RHS:%[0-9]+]]:_(<4 x s32>) = COPY $q2 ; CHECK: [[TST:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[TST32]](<4 x s32>) ; CHECK: [[RES:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TST]](<4 x s1>), [[LHS]], [[RHS]] -; CHECK: %q0 = COPY [[RES]] +; CHECK: $q0 = COPY [[RES]] define <4 x i32> @test_vselect_vec(<4 x i32> %tst32, <4 x i32> %lhs, <4 x i32> %rhs) { %tst = trunc <4 x i32> %tst32 to <4 x i1> %res = select <4 x i1> %tst, <4 x i32> %lhs, <4 x i32> %rhs @@ -1020,10 +1099,10 @@ define <4 x i32> @test_vselect_vec(<4 x i32> %tst32, <4 x i32> %lhs, <4 x i32> % } ; CHECK-LABEL: name: test_fptosi -; CHECK: [[FPADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[FPADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[FP:%[0-9]+]]:_(s32) = G_LOAD [[FPADDR]](p0) ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPTOSI [[FP]](s32) -; CHECK: %x0 = COPY [[RES]] +; CHECK: $x0 = COPY [[RES]] define i64 @test_fptosi(float* %fp.addr) { %fp = load float, float* %fp.addr %res = fptosi float %fp to i64 @@ -1031,10 +1110,10 @@ define i64 @test_fptosi(float* %fp.addr) { } ; CHECK-LABEL: name: test_fptoui -; CHECK: [[FPADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[FPADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[FP:%[0-9]+]]:_(s32) = G_LOAD [[FPADDR]](p0) ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPTOUI [[FP]](s32) -; CHECK: %x0 = COPY [[RES]] +; CHECK: $x0 = COPY [[RES]] define i64 @test_fptoui(float* %fp.addr) { %fp = load float, float* %fp.addr %res = fptoui float %fp to i64 @@ -1042,8 +1121,8 @@ define i64 @test_fptoui(float* %fp.addr) { } ; CHECK-LABEL: name: test_sitofp -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[FP:%[0-9]+]]:_(s64) = G_SITOFP [[IN]](s32) ; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0) define void @test_sitofp(double* %addr, i32 %in) { @@ -1053,8 +1132,8 @@ define void @test_sitofp(double* %addr, i32 %in) { } ; CHECK-LABEL: name: test_uitofp -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[FP:%[0-9]+]]:_(s64) = G_UITOFP [[IN]](s32) ; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0) define void @test_uitofp(double* %addr, i32 %in) { @@ -1064,25 +1143,25 @@ define void @test_uitofp(double* %addr, i32 %in) { } ; CHECK-LABEL: name: test_fpext -; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPEXT [[IN]](s32) -; CHECK: %d0 = COPY [[RES]] +; CHECK: $d0 = COPY [[RES]] define double @test_fpext(float %in) { %res = fpext float %in to double ret double %res } ; CHECK-LABEL: name: test_fptrunc -; CHECK: [[IN:%[0-9]+]]:_(s64) = COPY %d0 +; CHECK: [[IN:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FPTRUNC [[IN]](s64) -; CHECK: %s0 = COPY [[RES]] +; CHECK: $s0 = COPY [[RES]] define float @test_fptrunc(double %in) { %res = fptrunc double %in to float ret float %res } ; CHECK-LABEL: name: test_constant_float -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[TMP:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+00 ; CHECK: G_STORE [[TMP]](s32), [[ADDR]](p0) define void @test_constant_float(float* %addr) { @@ -1091,9 +1170,9 @@ define void @test_constant_float(float* %addr) { } ; CHECK-LABEL: name: float_comparison -; CHECK: [[LHSADDR:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[RHSADDR:%[0-9]+]]:_(p0) = COPY %x1 -; CHECK: [[BOOLADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHSADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[RHSADDR:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: [[BOOLADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[LHS:%[0-9]+]]:_(s32) = G_LOAD [[LHSADDR]](p0) ; CHECK: [[RHS:%[0-9]+]]:_(s32) = G_LOAD [[RHSADDR]](p0) ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[LHS]](s32), [[RHS]] @@ -1124,7 +1203,7 @@ define i1 @trivial_float_comparison(double %a, double %b) { define i32* @test_global() { ; CHECK-LABEL: name: test_global ; CHECK: [[TMP:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var{{$}} -; CHECK: %x0 = COPY [[TMP]](p0) +; CHECK: $x0 = COPY [[TMP]](p0) ret i32* @var } @@ -1133,7 +1212,7 @@ define i32* @test_global() { define i32 addrspace(42)* @test_global_addrspace() { ; CHECK-LABEL: name: test_global ; CHECK: [[TMP:%[0-9]+]]:_(p42) = G_GLOBAL_VALUE @var1{{$}} -; CHECK: %x0 = COPY [[TMP]](p42) +; CHECK: $x0 = COPY [[TMP]](p42) ret i32 addrspace(42)* @var1 } @@ -1142,52 +1221,52 @@ define i32 addrspace(42)* @test_global_addrspace() { define void()* @test_global_func() { ; CHECK-LABEL: name: test_global_func ; CHECK: [[TMP:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @allocai64{{$}} -; CHECK: %x0 = COPY [[TMP]](p0) +; CHECK: $x0 = COPY [[TMP]](p0) ret void()* @allocai64 } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memcpy -; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY %x1 -; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY %x2 -; CHECK: %x0 = COPY [[DST]] -; CHECK: %x1 = COPY [[SRC]] -; CHECK: %x2 = COPY [[SIZE]] -; CHECK: BL $memcpy, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0) +; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 +; CHECK: $x0 = COPY [[DST]] +; CHECK: $x1 = COPY [[SRC]] +; CHECK: $x2 = COPY [[SIZE]] +; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } -declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) +declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1) define void @test_memmove(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memmove -; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY %x1 -; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY %x2 -; CHECK: %x0 = COPY [[DST]] -; CHECK: %x1 = COPY [[SRC]] -; CHECK: %x2 = COPY [[SIZE]] -; CHECK: BL $memmove, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0) +; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 +; CHECK: $x0 = COPY [[DST]] +; CHECK: $x1 = COPY [[SRC]] +; CHECK: $x2 = COPY [[SIZE]] +; CHECK: BL &memmove, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 + call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32 %align, i1 %volatile) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) define void @test_memset(i8* %dst, i8 %val, i64 %size) { ; CHECK-LABEL: name: test_memset -; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[SRC_C:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[SRC_C:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[SRC:%[0-9]+]]:_(s8) = G_TRUNC [[SRC_C]] -; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY %x2 -; CHECK: %x0 = COPY [[DST]] +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 +; CHECK: $x0 = COPY [[DST]] ; CHECK: [[SRC_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[SRC]] -; CHECK: %w1 = COPY [[SRC_TMP]] -; CHECK: %x2 = COPY [[SIZE]] -; CHECK: BL $memset, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %w1, implicit %x2 - call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i32 1, i1 0) +; CHECK: $w1 = COPY [[SRC_TMP]] +; CHECK: $x2 = COPY [[SIZE]] +; CHECK: BL &memset, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $w1, implicit $x2 + call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i1 0) ret void } @@ -1195,8 +1274,8 @@ declare i64 @llvm.objectsize.i64(i8*, i1) declare i32 @llvm.objectsize.i32(i8*, i1) define void @test_objectsize(i8* %addr0, i8* %addr1) { ; CHECK-LABEL: name: test_objectsize -; CHECK: [[ADDR0:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: [[ADDR0:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: {{%[0-9]+}}:_(s64) = G_CONSTANT i64 -1 ; CHECK: {{%[0-9]+}}:_(s64) = G_CONSTANT i64 0 ; CHECK: {{%[0-9]+}}:_(s32) = G_CONSTANT i32 -1 @@ -1210,7 +1289,7 @@ define void @test_objectsize(i8* %addr0, i8* %addr1) { define void @test_large_const(i128* %addr) { ; CHECK-LABEL: name: test_large_const -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(s128) = G_CONSTANT i128 42 ; CHECK: G_STORE [[VAL]](s128), [[ADDR]](p0) store i128 42, i128* %addr @@ -1245,7 +1324,7 @@ define void @test_va_end(i8* %list) { define void @test_va_arg(i8* %list) { ; CHECK-LABEL: test_va_arg -; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: G_VAARG [[LIST]](p0), 8 ; CHECK: G_VAARG [[LIST]](p0), 1 ; CHECK: G_VAARG [[LIST]](p0), 16 @@ -1259,10 +1338,10 @@ define void @test_va_arg(i8* %list) { declare float @llvm.pow.f32(float, float) define float @test_pow_intrin(float %l, float %r) { ; CHECK-LABEL: name: test_pow_intrin -; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %s0 -; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $s0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $s1 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FPOW [[LHS]], [[RHS]] -; CHECK: %s0 = COPY [[RES]] +; CHECK: $s0 = COPY [[RES]] %res = call float @llvm.pow.f32(float %l, float %r) ret float %res } @@ -1270,11 +1349,11 @@ define float @test_pow_intrin(float %l, float %r) { declare float @llvm.fma.f32(float, float, float) define float @test_fma_intrin(float %a, float %b, float %c) { ; CHECK-LABEL: name: test_fma_intrin -; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 -; CHECK: [[B:%[0-9]+]]:_(s32) = COPY %s1 -; CHECK: [[C:%[0-9]+]]:_(s32) = COPY %s2 +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0 +; CHECK: [[B:%[0-9]+]]:_(s32) = COPY $s1 +; CHECK: [[C:%[0-9]+]]:_(s32) = COPY $s2 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FMA [[A]], [[B]], [[C]] -; CHECK: %s0 = COPY [[RES]] +; CHECK: $s0 = COPY [[RES]] %res = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %res } @@ -1282,9 +1361,9 @@ define float @test_fma_intrin(float %a, float %b, float %c) { declare float @llvm.exp.f32(float) define float @test_exp_intrin(float %a) { ; CHECK-LABEL: name: test_exp_intrin -; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FEXP [[A]] -; CHECK: %s0 = COPY [[RES]] +; CHECK: $s0 = COPY [[RES]] %res = call float @llvm.exp.f32(float %a) ret float %res } @@ -1292,9 +1371,9 @@ define float @test_exp_intrin(float %a) { declare float @llvm.exp2.f32(float) define float @test_exp2_intrin(float %a) { ; CHECK-LABEL: name: test_exp2_intrin -; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FEXP2 [[A]] -; CHECK: %s0 = COPY [[RES]] +; CHECK: $s0 = COPY [[RES]] %res = call float @llvm.exp2.f32(float %a) ret float %res } @@ -1302,9 +1381,9 @@ define float @test_exp2_intrin(float %a) { declare float @llvm.log.f32(float) define float @test_log_intrin(float %a) { ; CHECK-LABEL: name: test_log_intrin -; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FLOG [[A]] -; CHECK: %s0 = COPY [[RES]] +; CHECK: $s0 = COPY [[RES]] %res = call float @llvm.log.f32(float %a) ret float %res } @@ -1312,12 +1391,23 @@ define float @test_log_intrin(float %a) { declare float @llvm.log2.f32(float) define float @test_log2_intrin(float %a) { ; CHECK-LABEL: name: test_log2_intrin -; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FLOG2 [[A]] -; CHECK: %s0 = COPY [[RES]] +; CHECK: $s0 = COPY [[RES]] %res = call float @llvm.log2.f32(float %a) ret float %res } + +declare float @llvm.fabs.f32(float) +define float @test_fabs_intrin(float %a) { +; CHECK-LABEL: name: test_fabs_intrin +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FABS [[A]] +; CHECK: $s0 = COPY [[RES]] + %res = call float @llvm.fabs.f32(float %a) + ret float %res +} + declare void @llvm.lifetime.start.p0i8(i64, i8*) declare void @llvm.lifetime.end.p0i8(i64, i8*) define void @test_lifetime_intrin() { @@ -1331,7 +1421,7 @@ define void @test_lifetime_intrin() { define void @test_load_store_atomics(i8* %addr) { ; CHECK-LABEL: name: test_load_store_atomics -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[V0:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load unordered 1 from %ir.addr) ; CHECK: G_STORE [[V0]](s8), [[ADDR]](p0) :: (store monotonic 1 into %ir.addr) ; CHECK: [[V1:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load acquire 1 from %ir.addr) @@ -1352,26 +1442,26 @@ define void @test_load_store_atomics(i8* %addr) { define float @test_fneg_f32(float %x) { ; CHECK-LABEL: name: test_fneg_f32 -; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FNEG [[ARG]] -; CHECK: %s0 = COPY [[RES]](s32) +; CHECK: $s0 = COPY [[RES]](s32) %neg = fsub float -0.000000e+00, %x ret float %neg } define double @test_fneg_f64(double %x) { ; CHECK-LABEL: name: test_fneg_f64 -; CHECK: [[ARG:%[0-9]+]]:_(s64) = COPY %d0 +; CHECK: [[ARG:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FNEG [[ARG]] -; CHECK: %d0 = COPY [[RES]](s64) +; CHECK: $d0 = COPY [[RES]](s64) %neg = fsub double -0.000000e+00, %x ret double %neg } define void @test_trivial_inlineasm() { ; CHECK-LABEL: name: test_trivial_inlineasm -; CHECK: INLINEASM $wibble, 1 -; CHECK: INLINEASM $wibble, 0 +; CHECK: INLINEASM &wibble, 1 +; CHECK: INLINEASM &wibble, 0 call void asm sideeffect "wibble", ""() call void asm "wibble", ""() ret void @@ -1379,31 +1469,31 @@ define void @test_trivial_inlineasm() { define <2 x i32> @test_insertelement(<2 x i32> %vec, i32 %elt, i32 %idx){ ; CHECK-LABEL: name: test_insertelement -; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY %d0 -; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 +; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[RES:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[VEC]], [[ELT]](s32), [[IDX]](s32) -; CHECK: %d0 = COPY [[RES]](<2 x s32>) +; CHECK: $d0 = COPY [[RES]](<2 x s32>) %res = insertelement <2 x i32> %vec, i32 %elt, i32 %idx ret <2 x i32> %res } define i32 @test_extractelement(<2 x i32> %vec, i32 %idx) { ; CHECK-LABEL: name: test_extractelement -; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY %d0 -; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 +; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s32) -; CHECK: %w0 = COPY [[RES]](s32) +; CHECK: $w0 = COPY [[RES]](s32) %res = extractelement <2 x i32> %vec, i32 %idx ret i32 %res } define i32 @test_singleelementvector(i32 %elt){ ; CHECK-LABEL: name: test_singleelementvector -; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NOT: G_INSERT_VECTOR_ELT ; CHECK-NOT: G_EXTRACT_VECTOR_ELT -; CHECK: %w0 = COPY [[ELT]](s32) +; CHECK: $w0 = COPY [[ELT]](s32) %vec = insertelement <1 x i32> undef, i32 %elt, i32 0 %res = extractelement <1 x i32> %vec, i32 0 ret i32 %res @@ -1413,7 +1503,7 @@ define <2 x i32> @test_constantaggzerovector_v2i32() { ; CHECK-LABEL: name: test_constantaggzerovector_v2i32 ; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[ZERO]](s32), [[ZERO]](s32) -; CHECK: %d0 = COPY [[VEC]](<2 x s32>) +; CHECK: $d0 = COPY [[VEC]](<2 x s32>) ret <2 x i32> zeroinitializer } @@ -1421,7 +1511,7 @@ define <2 x float> @test_constantaggzerovector_v2f32() { ; CHECK-LABEL: name: test_constantaggzerovector_v2f32 ; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[ZERO]](s32), [[ZERO]](s32) -; CHECK: %d0 = COPY [[VEC]](<2 x s32>) +; CHECK: $d0 = COPY [[VEC]](<2 x s32>) ret <2 x float> zeroinitializer } @@ -1439,7 +1529,7 @@ define <2 x i32> @test_constantdatavector_v2i32() { ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) -; CHECK: %d0 = COPY [[VEC]](<2 x s32>) +; CHECK: $d0 = COPY [[VEC]](<2 x s32>) ret <2 x i32> <i32 1, i32 2> } @@ -1461,7 +1551,7 @@ define <4 x i32> @test_constantdatavector_v4i32() { ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[VEC:%[0-9]+]]:_(<4 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) -; CHECK: %q0 = COPY [[VEC]](<4 x s32>) +; CHECK: $q0 = COPY [[VEC]](<4 x s32>) ret <4 x i32> <i32 1, i32 2, i32 3, i32 4> } @@ -1470,13 +1560,13 @@ define <2 x double> @test_constantdatavector_v2f64() { ; CHECK: [[FC1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 ; CHECK: [[FC2:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s64>) = G_MERGE_VALUES [[FC1]](s64), [[FC2]](s64) -; CHECK: %q0 = COPY [[VEC]](<2 x s64>) +; CHECK: $q0 = COPY [[VEC]](<2 x s64>) ret <2 x double> <double 1.0, double 2.0> } define i32 @test_constantaggzerovector_v1s32(i32 %arg){ ; CHECK-LABEL: name: test_constantaggzerovector_v1s32 -; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NOT: G_MERGE_VALUES ; CHECK: G_ADD [[ARG]], [[C0]] @@ -1488,7 +1578,7 @@ define i32 @test_constantaggzerovector_v1s32(i32 %arg){ define i32 @test_constantdatavector_v1s32(i32 %arg){ ; CHECK-LABEL: name: test_constantdatavector_v1s32 -; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NOT: G_MERGE_VALUES ; CHECK: G_ADD [[ARG]], [[C1]] @@ -1501,21 +1591,21 @@ define i32 @test_constantdatavector_v1s32(i32 %arg){ declare ghccc float @different_call_conv_target(float %x) define float @test_different_call_conv_target(float %x) { ; CHECK-LABEL: name: test_different_call_conv -; CHECK: [[X:%[0-9]+]]:_(s32) = COPY %s0 -; CHECK: %s8 = COPY [[X]] -; CHECK: BL @different_call_conv_target, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s8, implicit-def %s0 +; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $s0 +; CHECK: $s8 = COPY [[X]] +; CHECK: BL @different_call_conv_target, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s8, implicit-def $s0 %res = call ghccc float @different_call_conv_target(float %x) ret float %res } define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) { ; CHECK-LABEL: name: test_shufflevector_s32_v2s32 -; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-DAG: [[MASK:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32) ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>) -; CHECK: %d0 = COPY [[VEC]](<2 x s32>) +; CHECK: $d0 = COPY [[VEC]](<2 x s32>) %vec = insertelement <1 x i32> undef, i32 %arg, i32 0 %res = shufflevector <1 x i32> %vec, <1 x i32> undef, <2 x i32> zeroinitializer ret <2 x i32> %res @@ -1523,11 +1613,11 @@ define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) { define i32 @test_shufflevector_v2s32_s32(<2 x i32> %arg) { ; CHECK-LABEL: name: test_shufflevector_v2s32_s32 -; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY %d0 +; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], [[C1]](s32) -; CHECK: %w0 = COPY [[RES]](s32) +; CHECK: $w0 = COPY [[RES]](s32) %vec = shufflevector <2 x i32> %arg, <2 x i32> undef, <1 x i32> <i32 1> %res = extractelement <1 x i32> %vec, i32 0 ret i32 %res @@ -1535,20 +1625,20 @@ define i32 @test_shufflevector_v2s32_s32(<2 x i32> %arg) { define <2 x i32> @test_shufflevector_v2s32_v2s32(<2 x i32> %arg) { ; CHECK-LABEL: name: test_shufflevector_v2s32_v2s32 -; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY %d0 +; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-DAG: [[MASK:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32) ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], [[MASK]](<2 x s32>) -; CHECK: %d0 = COPY [[VEC]](<2 x s32>) +; CHECK: $d0 = COPY [[VEC]](<2 x s32>) %res = shufflevector <2 x i32> %arg, <2 x i32> undef, <2 x i32> <i32 1, i32 0> ret <2 x i32> %res } define i32 @test_shufflevector_v2s32_v3s32(<2 x i32> %arg) { ; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32 -; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY %d0 +; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1562,28 +1652,28 @@ define i32 @test_shufflevector_v2s32_v3s32(<2 x i32> %arg) { define <4 x i32> @test_shufflevector_v2s32_v4s32(<2 x i32> %arg1, <2 x i32> %arg2) { ; CHECK-LABEL: name: test_shufflevector_v2s32_v4s32 -; CHECK: [[ARG1:%[0-9]+]]:_(<2 x s32>) = COPY %d0 -; CHECK: [[ARG2:%[0-9]+]]:_(<2 x s32>) = COPY %d1 +; CHECK: [[ARG1:%[0-9]+]]:_(<2 x s32>) = COPY $d0 +; CHECK: [[ARG2:%[0-9]+]]:_(<2 x s32>) = COPY $d1 ; CHECK: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK: [[MASK:%[0-9]+]]:_(<4 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32) ; CHECK: [[VEC:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[ARG1]](<2 x s32>), [[ARG2]], [[MASK]](<4 x s32>) -; CHECK: %q0 = COPY [[VEC]](<4 x s32>) +; CHECK: $q0 = COPY [[VEC]](<4 x s32>) %res = shufflevector <2 x i32> %arg1, <2 x i32> %arg2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x i32> %res } define <2 x i32> @test_shufflevector_v4s32_v2s32(<4 x i32> %arg) { ; CHECK-LABEL: name: test_shufflevector_v4s32_v2s32 -; CHECK: [[ARG:%[0-9]+]]:_(<4 x s32>) = COPY %q0 +; CHECK: [[ARG:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-DAG: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-DAG: [[MASK:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C3]](s32) ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<4 x s32>), [[UNDEF]], [[MASK]](<2 x s32>) -; CHECK: %d0 = COPY [[VEC]](<2 x s32>) +; CHECK: $d0 = COPY [[VEC]](<2 x s32>) %res = shufflevector <4 x i32> %arg, <4 x i32> undef, <2 x i32> <i32 1, i32 3> ret <2 x i32> %res } @@ -1591,8 +1681,8 @@ define <2 x i32> @test_shufflevector_v4s32_v2s32(<4 x i32> %arg) { define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2) { ; CHECK-LABEL: name: test_shufflevector_v8s8_v16s8 -; CHECK: [[ARG1:%[0-9]+]]:_(<8 x s8>) = COPY %d0 -; CHECK: [[ARG2:%[0-9]+]]:_(<8 x s8>) = COPY %d1 +; CHECK: [[ARG1:%[0-9]+]]:_(<8 x s8>) = COPY $d0 +; CHECK: [[ARG2:%[0-9]+]]:_(<8 x s8>) = COPY $d1 ; CHECK: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -1611,7 +1701,7 @@ define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2) ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; CHECK: [[MASK:%[0-9]+]]:_(<16 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C8]](s32), [[C1]](s32), [[C9]](s32), [[C2]](s32), [[C10]](s32), [[C3]](s32), [[C11]](s32), [[C4]](s32), [[C12]](s32), [[C5]](s32), [[C13]](s32), [[C6]](s32), [[C14]](s32), [[C7]](s32), [[C15]](s32) ; CHECK: [[VEC:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[ARG1]](<8 x s8>), [[ARG2]], [[MASK]](<16 x s32>) -; CHECK: %q0 = COPY [[VEC]](<16 x s8>) +; CHECK: $q0 = COPY [[VEC]](<16 x s8>) %res = shufflevector <8 x i8> %arg1, <8 x i8> %arg2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> ret <16 x i8> %res } @@ -1620,14 +1710,14 @@ define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2) ; CHECK: [[UNDEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK: [[F:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 ; CHECK: [[M:%[0-9]+]]:_(<4 x s16>) = G_MERGE_VALUES [[UNDEF]](s16), [[UNDEF]](s16), [[UNDEF]](s16), [[F]](s16) -; CHECK: %d0 = COPY [[M]](<4 x s16>) +; CHECK: $d0 = COPY [[M]](<4 x s16>) define <4 x half> @test_constant_vector() { ret <4 x half> <half undef, half undef, half undef, half 0xH3C00> } define i32 @test_target_mem_intrinsic(i32* %addr) { ; CHECK-LABEL: name: test_target_mem_intrinsic -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load 4 from %ir.addr) ; CHECK: G_TRUNC [[VAL]](s64) %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) @@ -1649,3 +1739,411 @@ entry: %val = load %zerosize_type, %zerosize_type* %ptr, align 4 ret %zerosize_type %in } + + +define i64 @test_phi_loop(i32 %n) { +; CHECK-LABEL: name: test_phi_loop +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[CST1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[CST2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + +; CHECK: [[PN1:%[0-9]+]]:_(s32) = G_PHI [[ARG1]](s32), %bb.1, [[SUB:%[0-9]+]](s32), %bb.2 +; CHECK: [[PN2:%[0-9]+]]:_(s64) = G_PHI [[CST3]](s64), %bb.1, [[PN3:%[0-9]+]](s64), %bb.2 +; CHECK: [[PN3]]:_(s64) = G_PHI [[CST4]](s64), %bb.1, [[ADD:%[0-9]+]](s64), %bb.2 +; CHECK: [[ADD]]:_(s64) = G_ADD [[PN2]], [[PN3]] +; CHECK: [[SUB]]:_(s32) = G_SUB [[PN1]], [[CST1]] +; CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PN1]](s32), [[CST2]] +; CHECK: G_BRCOND [[CMP]](s1), %bb.3 +; CHECK: G_BR %bb.2 + +; CHECK: $x0 = COPY [[PN2]](s64) +; CHECK: RET_ReallyLR implicit $x0 +entry: + br label %loop + +loop: + %counter = phi i32 [ %n, %entry ], [ %counter.dec, %loop ] + %elem = phi { i64, i64 } [ { i64 0, i64 1 }, %entry ], [ %updated, %loop ] + %prev = extractvalue { i64, i64 } %elem, 0 + %curr = extractvalue { i64, i64 } %elem, 1 + %next = add i64 %prev, %curr + %shifted = insertvalue { i64, i64 } %elem, i64 %curr, 0 + %updated = insertvalue { i64, i64 } %shifted, i64 %next, 1 + %counter.dec = sub i32 %counter, 1 + %cond = icmp sle i32 %counter, 0 + br i1 %cond, label %exit, label %loop + +exit: + %res = extractvalue { i64, i64 } %elem, 0 + ret i64 %res +} + +define void @test_phi_diamond({ i8, i16, i32 }* %a.ptr, { i8, i16, i32 }* %b.ptr, i1 %selector, { i8, i16, i32 }* %dst) { +; CHECK-LABEL: name: test_phi_diamond +; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[ARG2:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: [[ARG3:%[0-9]+]]:_(s32) = COPY $w2 +; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ARG3]](s32) +; CHECK: [[ARG4:%[0-9]+]]:_(p0) = COPY $x3 +; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2 +; CHECK: G_BR %bb.3 + +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD [[ARG1]](p0) :: (load 1 from %ir.a.ptr, align 4) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ARG1]], [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p0) :: (load 2 from %ir.a.ptr + 2) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ARG1]], [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.a.ptr + 4) +; CHECK: G_BR %bb.4 + +; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[ARG2]](p0) :: (load 1 from %ir.b.ptr, align 4) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[ARG2]], [[CST3]](s64) +; CHECK: [[LD5:%[0-9]+]]:_(s16) = G_LOAD [[GEP3]](p0) :: (load 2 from %ir.b.ptr + 2) +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[ARG2]], [[CST4]](s64) +; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.b.ptr + 4) + +; CHECK: [[PN1:%[0-9]+]]:_(s8) = G_PHI [[LD1]](s8), %bb.2, [[LD4]](s8), %bb.3 +; CHECK: [[PN2:%[0-9]+]]:_(s16) = G_PHI [[LD2]](s16), %bb.2, [[LD5]](s16), %bb.3 +; CHECK: [[PN3:%[0-9]+]]:_(s32) = G_PHI [[LD3]](s32), %bb.2, [[LD6]](s32), %bb.3 +; CHECK: G_STORE [[PN1]](s8), [[ARG4]](p0) :: (store 1 into %ir.dst, align 4) +; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[ARG4]], [[CST5]](s64) +; CHECK: G_STORE [[PN2]](s16), [[GEP5]](p0) :: (store 2 into %ir.dst + 2) +; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[ARG4]], [[CST6]](s64) +; CHECK: G_STORE [[PN3]](s32), [[GEP6]](p0) :: (store 4 into %ir.dst + 4) +; CHECK: RET_ReallyLR + +entry: + br i1 %selector, label %store.a, label %store.b + +store.a: + %a = load { i8, i16, i32 }, { i8, i16, i32 }* %a.ptr + br label %join + +store.b: + %b = load { i8, i16, i32 }, { i8, i16, i32 }* %b.ptr + br label %join + +join: + %v = phi { i8, i16, i32 } [ %a, %store.a ], [ %b, %store.b ] + store { i8, i16, i32 } %v, { i8, i16, i32 }* %dst + ret void +} + +%agg.inner.inner = type {i64, i64} +%agg.inner = type {i16, i8, %agg.inner.inner } +%agg.nested = type {i32, i32, %agg.inner, i32} + +define void @test_nested_aggregate_const(%agg.nested *%ptr) { +; CHECK-LABEL: name: test_nested_aggregate_const +; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[CST1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[CST2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 +; CHECK: [[CST3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 +; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 +; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[CST6:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 +; CHECK: G_STORE [[CST1]](s32), [[BASE]](p0) :: (store 4 into %ir.ptr, align 8) +; CHECK: [[CST7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST7]](s64) +; CHECK: G_STORE [[CST1]](s32), [[GEP1]](p0) :: (store 4 into %ir.ptr + 4) +; CHECK: [[CST8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST8]](s64) +; CHECK: G_STORE [[CST2]](s16), [[GEP2]](p0) :: (store 2 into %ir.ptr + 8, align 8) +; CHECK: [[CST9:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST9]](s64) +; CHECK: G_STORE [[CST3]](s8), [[GEP3]](p0) :: (store 1 into %ir.ptr + 10, align 2) +; CHECK: [[CST10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 +; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST10]](s64) +; CHECK: G_STORE [[CST4]](s64), [[GEP4]](p0) :: (store 8 into %ir.ptr + 16) +; CHECK: [[CST11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 +; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST11]](s64) +; CHECK: G_STORE [[CST5]](s64), [[GEP5]](p0) :: (store 8 into %ir.ptr + 24) +; CHECK: [[CST12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 +; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[CST12]](s64) +; CHECK: G_STORE [[CST6]](s32), [[GEP6]](p0) :: (store 4 into %ir.ptr + 32, align 8) + store %agg.nested { i32 1, i32 1, %agg.inner { i16 2, i8 3, %agg.inner.inner {i64 5, i64 8} }, i32 13}, %agg.nested *%ptr + ret void +} + +define i1 @return_i1_zext() { +; AAPCS ABI says that booleans can only be 1 or 0, so we need to zero-extend. +; CHECK-LABEL: name: return_i1_zext +; CHECK: [[CST:%[0-9]+]]:_(s1) = G_CONSTANT i1 true +; CHECK: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[CST]](s1) +; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) +; CHECK: $w0 = COPY [[ANYEXT]](s32) +; CHECK: RET_ReallyLR implicit $w0 + ret i1 true +} + +; Try one cmpxchg +define i32 @test_atomic_cmpxchg_1(i32* %addr) { +; CHECK-LABEL: name: test_atomic_cmpxchg_1 +; CHECK: bb.1.entry: +; CHECK-NEXT: successors: %bb.{{[^)]+}} +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[OLDVAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK-NEXT: [[NEWVAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: bb.2.repeat: +; CHECK-NEXT: successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}}) +; CHECK: [[OLDVALRES:%[0-9]+]]:_(s32), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store monotonic monotonic 4 on %ir.addr) +; CHECK-NEXT: G_BRCOND [[SUCCESS]](s1), %bb.3 +; CHECK-NEXT: G_BR %bb.2 +; CHECK: bb.3.done: +entry: + br label %repeat +repeat: + %val_success = cmpxchg i32* %addr, i32 0, i32 1 monotonic monotonic + %value_loaded = extractvalue { i32, i1 } %val_success, 0 + %success = extractvalue { i32, i1 } %val_success, 1 + br i1 %success, label %done, label %repeat +done: + ret i32 %value_loaded +} + +; Try one cmpxchg with a small type and high atomic ordering. +define i16 @test_atomic_cmpxchg_2(i16* %addr) { +; CHECK-LABEL: name: test_atomic_cmpxchg_2 +; CHECK: bb.1.entry: +; CHECK-NEXT: successors: %bb.2({{[^)]+}}) +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[OLDVAL:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 +; CHECK-NEXT: [[NEWVAL:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 +; CHECK: bb.2.repeat: +; CHECK-NEXT: successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}}) +; CHECK: [[OLDVALRES:%[0-9]+]]:_(s16), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store seq_cst seq_cst 2 on %ir.addr) +; CHECK-NEXT: G_BRCOND [[SUCCESS]](s1), %bb.3 +; CHECK-NEXT: G_BR %bb.2 +; CHECK: bb.3.done: +entry: + br label %repeat +repeat: + %val_success = cmpxchg i16* %addr, i16 0, i16 1 seq_cst seq_cst + %value_loaded = extractvalue { i16, i1 } %val_success, 0 + %success = extractvalue { i16, i1 } %val_success, 1 + br i1 %success, label %done, label %repeat +done: + ret i16 %value_loaded +} + +; Try one cmpxchg where the success order and failure order differ. +define i64 @test_atomic_cmpxchg_3(i64* %addr) { +; CHECK-LABEL: name: test_atomic_cmpxchg_3 +; CHECK: bb.1.entry: +; CHECK-NEXT: successors: %bb.2({{[^)]+}}) +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[OLDVAL:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK-NEXT: [[NEWVAL:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 +; CHECK: bb.2.repeat: +; CHECK-NEXT: successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}}) +; CHECK: [[OLDVALRES:%[0-9]+]]:_(s64), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store seq_cst acquire 8 on %ir.addr) +; CHECK-NEXT: G_BRCOND [[SUCCESS]](s1), %bb.3 +; CHECK-NEXT: G_BR %bb.2 +; CHECK: bb.3.done: +entry: + br label %repeat +repeat: + %val_success = cmpxchg i64* %addr, i64 0, i64 1 seq_cst acquire + %value_loaded = extractvalue { i64, i1 } %val_success, 0 + %success = extractvalue { i64, i1 } %val_success, 1 + br i1 %success, label %done, label %repeat +done: + ret i64 %value_loaded +} + +; Try a monotonic atomicrmw xchg +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_xchg(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_xchg +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_XCHG [[ADDR]](p0), [[VAL]] :: (load store monotonic 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw xchg i256* %addr, i256 1 monotonic + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try an acquire atomicrmw add +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_add(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_add +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_ADD [[ADDR]](p0), [[VAL]] :: (load store acquire 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw add i256* %addr, i256 1 acquire + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try a release atomicrmw sub +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_sub(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_sub +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_SUB [[ADDR]](p0), [[VAL]] :: (load store release 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw sub i256* %addr, i256 1 release + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try an acq_rel atomicrmw and +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_and(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_and +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_AND [[ADDR]](p0), [[VAL]] :: (load store acq_rel 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw and i256* %addr, i256 1 acq_rel + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try an seq_cst atomicrmw nand +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_nand(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_nand +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_NAND [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw nand i256* %addr, i256 1 seq_cst + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try an seq_cst atomicrmw or +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_or(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_or +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_OR [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw or i256* %addr, i256 1 seq_cst + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try an seq_cst atomicrmw xor +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_xor(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_xor +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_XOR [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw xor i256* %addr, i256 1 seq_cst + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try an seq_cst atomicrmw min +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_min(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_min +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_MIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw min i256* %addr, i256 1 seq_cst + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try an seq_cst atomicrmw max +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_max(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_max +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_MAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw max i256* %addr, i256 1 seq_cst + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try an seq_cst atomicrmw unsigned min +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_umin(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_umin +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_UMIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw umin i256* %addr, i256 1 seq_cst + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} + +; Try an seq_cst atomicrmw unsigned max +; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. +define i32 @test_atomicrmw_umax(i256* %addr) { +; CHECK-LABEL: name: test_atomicrmw_umax +; CHECK: bb.1 (%ir-block.{{[0-9]+}}): +; CHECK-NEXT: liveins: $x0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_UMAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] + %oldval = atomicrmw umax i256* %addr, i256 1 seq_cst + ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this + ; test so work around it by truncating to i32 for now. + %oldval.trunc = trunc i256 %oldval to i32 + ret i32 %oldval.trunc +} diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir index a27cf2bea78c..d165a1168a59 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir +++ b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir @@ -1,5 +1,5 @@ -# RUN: llc -O0 -run-pass=regbankselect -global-isel %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=FAST -# RUN: llc -O0 -run-pass=regbankselect -global-isel %s -regbankselect-greedy -o - -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=GREEDY +# RUN: llc -O0 -run-pass=regbankselect %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=FAST +# RUN: llc -O0 -run-pass=regbankselect %s -regbankselect-greedy -o - -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=GREEDY --- | ; ModuleID = 'generic-virtual-registers-type-error.mir' @@ -122,10 +122,10 @@ registers: - { id: 1, class: _ } body: | bb.0.entry: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: defaultMapping ; CHECK: %1:gpr(s32) = G_ADD %0 - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_ADD %0, %0 ... @@ -140,11 +140,11 @@ registers: - { id: 1, class: _ } body: | bb.0.entry: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: defaultMappingVector - ; CHECK: %0:fpr(<2 x s32>) = COPY %d0 + ; CHECK: %0:fpr(<2 x s32>) = COPY $d0 ; CHECK: %1:fpr(<2 x s32>) = G_ADD %0 - %0(<2 x s32>) = COPY %d0 + %0(<2 x s32>) = COPY $d0 %1(<2 x s32>) = G_ADD %0, %0 ... @@ -160,14 +160,14 @@ registers: - { id: 2, class: _ } body: | bb.0.entry: - liveins: %s0, %x0 + liveins: $s0, $x0 ; CHECK-LABEL: name: defaultMapping1Repair - ; CHECK: %0:fpr(s32) = COPY %s0 - ; CHECK-NEXT: %1:gpr(s32) = COPY %w0 + ; CHECK: %0:fpr(s32) = COPY $s0 + ; CHECK-NEXT: %1:gpr(s32) = COPY $w0 ; CHECK-NEXT: %3:gpr(s32) = COPY %0 ; CHECK-NEXT: %2:gpr(s32) = G_ADD %3, %1 - %0(s32) = COPY %s0 - %1(s32) = COPY %w0 + %0(s32) = COPY $s0 + %1(s32) = COPY $w0 %2(s32) = G_ADD %0, %1 ... @@ -179,13 +179,13 @@ registers: - { id: 1, class: _ } body: | bb.0.entry: - liveins: %s0, %x0 + liveins: $s0, $x0 ; CHECK-LABEL: name: defaultMapping2Repairs - ; CHECK: %0:fpr(s32) = COPY %s0 + ; CHECK: %0:fpr(s32) = COPY $s0 ; CHECK-NEXT: %2:gpr(s32) = COPY %0 ; CHECK-NEXT: %3:gpr(s32) = COPY %0 ; CHECK-NEXT: %1:gpr(s32) = G_ADD %2, %3 - %0(s32) = COPY %s0 + %0(s32) = COPY $s0 %1(s32) = G_ADD %0, %0 ... @@ -201,12 +201,12 @@ registers: - { id: 1, class: fpr } body: | bb.0.entry: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: defaultMappingDefRepair - ; CHECK: %0:gpr(s32) = COPY %w0 + ; CHECK: %0:gpr(s32) = COPY $w0 ; CHECK-NEXT: %2:gpr(s32) = G_ADD %0, %0 ; CHECK-NEXT: %1:fpr(s32) = COPY %2 - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_ADD %0, %0 ... @@ -231,12 +231,12 @@ registers: body: | bb.0.entry: successors: %bb.2.end, %bb.1.then - liveins: %x0, %x1, %w2 + liveins: $x0, $x1, $w2 - %0 = LDRWui killed %x0, 0 :: (load 4 from %ir.src) + %0 = LDRWui killed $x0, 0 :: (load 4 from %ir.src) %5(s32) = COPY %0 - %1(p0) = COPY %x1 - %2 = COPY %w2 + %1(p0) = COPY $x1 + %2 = COPY $w2 TBNZW killed %2, 0, %bb.2.end bb.1.then: @@ -259,14 +259,14 @@ registers: - { id: 2, class: _ } body: | bb.0.entry: - liveins: %w0, %s0 + liveins: $w0, $s0 ; CHECK-LABEL: name: defaultMappingUseRepairPhysReg - ; CHECK: %0:gpr(s32) = COPY %w0 - ; CHECK-NEXT: %1:fpr(s32) = COPY %s0 + ; CHECK: %0:gpr(s32) = COPY $w0 + ; CHECK-NEXT: %1:fpr(s32) = COPY $s0 ; CHECK-NEXT: %3:gpr(s32) = COPY %1 ; CHECK-NEXT: %2:gpr(s32) = G_ADD %0, %3 - %0(s32) = COPY %w0 - %1(s32) = COPY %s0 + %0(s32) = COPY $w0 + %1(s32) = COPY $s0 %2(s32) = G_ADD %0, %1 ... @@ -279,14 +279,14 @@ registers: - { id: 1, class: _ } body: | bb.0.entry: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: defaultMappingDefRepairPhysReg - ; CHECK: %0:gpr(s32) = COPY %w0 + ; CHECK: %0:gpr(s32) = COPY $w0 ; CHECK-NEXT: %1:gpr(s32) = G_ADD %0, %0 - ; CHECK-NEXT: %s0 = COPY %1 - %0(s32) = COPY %w0 + ; CHECK-NEXT: $s0 = COPY %1 + %0(s32) = COPY $w0 %1(s32) = G_ADD %0, %0 - %s0 = COPY %1 + $s0 = COPY %1 ... --- @@ -300,9 +300,9 @@ registers: - { id: 2, class: _ } body: | bb.0.entry: - liveins: %x0, %x1 - ; CHECK: %0:gpr(<2 x s32>) = COPY %x0 - ; CHECK-NEXT: %1:gpr(<2 x s32>) = COPY %x1 + liveins: $x0, $x1 + ; CHECK: %0:gpr(<2 x s32>) = COPY $x0 + ; CHECK-NEXT: %1:gpr(<2 x s32>) = COPY $x1 ; Fast mode tries to reuse the source of the copy for the destination. ; Now, the default mapping says that %0 and %1 need to be in FPR. @@ -314,8 +314,8 @@ body: | ; Greedy mode remapped the instruction on the GPR bank. ; GREEDY-NEXT: %2:gpr(<2 x s32>) = G_OR %0, %1 - %0(<2 x s32>) = COPY %x0 - %1(<2 x s32>) = COPY %x1 + %0(<2 x s32>) = COPY $x0 + %1(<2 x s32>) = COPY $x1 %2(<2 x s32>) = G_OR %0, %1 ... @@ -331,11 +331,11 @@ registers: - { id: 2, class: fpr } body: | bb.0.entry: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: greedyMappingOrWithConstraints - ; CHECK: %0:gpr(<2 x s32>) = COPY %x0 - ; CHECK-NEXT: %1:gpr(<2 x s32>) = COPY %x1 + ; CHECK: %0:gpr(<2 x s32>) = COPY $x0 + ; CHECK-NEXT: %1:gpr(<2 x s32>) = COPY $x1 ; Fast mode tries to reuse the source of the copy for the destination. ; Now, the default mapping says that %0 and %1 need to be in FPR. @@ -349,8 +349,8 @@ body: | ; GREEDY-NEXT: %3:gpr(<2 x s32>) = G_OR %0, %1 ; We need to keep %2 into FPR because we do not know anything about it. ; GREEDY-NEXT: %2:fpr(<2 x s32>) = COPY %3 - %0(<2 x s32>) = COPY %x0 - %1(<2 x s32>) = COPY %x1 + %0(<2 x s32>) = COPY $x0 + %1(<2 x s32>) = COPY $x1 %2(<2 x s32>) = G_OR %0, %1 ... @@ -366,17 +366,17 @@ registers: - { id: 1, class: gpr64 } body: | bb.0: - liveins: %x0 + liveins: $x0 - ; CHECK: %0:gpr64 = COPY %x0 + ; CHECK: %0:gpr64 = COPY $x0 ; CHECK-NEXT: %1:gpr64 = ADDXrr %0, %0 - ; CHECK-NEXT: %x0 = COPY %1 - ; CHECK-NEXT: RET_ReallyLR implicit %x0 + ; CHECK-NEXT: $x0 = COPY %1 + ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %0 = COPY %x0 + %0 = COPY $x0 %1 = ADDXrr %0, %0 - %x0 = COPY %1 - RET_ReallyLR implicit %x0 + $x0 = COPY %1 + RET_ReallyLR implicit $x0 ... --- @@ -404,13 +404,13 @@ registers: - { id: 1, class: _ } # CHECK: body: -# CHECK: %0:gpr(s32) = COPY %w0 +# CHECK: %0:gpr(s32) = COPY $w0 # CHECK: %1:gpr(s32) = G_BITCAST %0 body: | bb.0: - liveins: %w0 + liveins: $w0 - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_BITCAST %0 ... @@ -427,13 +427,13 @@ registers: - { id: 1, class: _ } # CHECK: body: -# CHECK: %0:fpr(<2 x s16>) = COPY %s0 +# CHECK: %0:fpr(<2 x s16>) = COPY $s0 # CHECK: %1:fpr(<2 x s16>) = G_BITCAST %0 body: | bb.0: - liveins: %s0 + liveins: $s0 - %0(<2 x s16>) = COPY %s0 + %0(<2 x s16>) = COPY $s0 %1(<2 x s16>) = G_BITCAST %0 ... @@ -451,14 +451,14 @@ registers: - { id: 1, class: _ } # CHECK: body: -# CHECK: %0:gpr(s32) = COPY %w0 +# CHECK: %0:gpr(s32) = COPY $w0 # FAST: %1:fpr(<2 x s16>) = G_BITCAST %0 # GREEDY: %1:gpr(<2 x s16>) = G_BITCAST %0 body: | bb.0: - liveins: %w0 + liveins: $w0 - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(<2 x s16>) = G_BITCAST %0 ... @@ -470,14 +470,14 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } # CHECK: body: -# CHECK: %0:fpr(<2 x s16>) = COPY %s0 +# CHECK: %0:fpr(<2 x s16>) = COPY $s0 # FAST: %1:gpr(s32) = G_BITCAST %0 # GREEDY: %1:fpr(s32) = G_BITCAST %0 body: | bb.0: - liveins: %s0 + liveins: $s0 - %0(<2 x s16>) = COPY %s0 + %0(<2 x s16>) = COPY $s0 %1(s32) = G_BITCAST %0 ... @@ -489,13 +489,13 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } # CHECK: body: -# CHECK: %0:gpr(s64) = COPY %x0 +# CHECK: %0:gpr(s64) = COPY $x0 # CHECK: %1:gpr(s64) = G_BITCAST %0 body: | bb.0: - liveins: %x0 + liveins: $x0 - %0(s64) = COPY %x0 + %0(s64) = COPY $x0 %1(s64) = G_BITCAST %0 ... @@ -507,13 +507,13 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } # CHECK: body: -# CHECK: %0:fpr(<2 x s32>) = COPY %d0 +# CHECK: %0:fpr(<2 x s32>) = COPY $d0 # CHECK: %1:fpr(<2 x s32>) = G_BITCAST %0 body: | bb.0: - liveins: %d0 + liveins: $d0 - %0(<2 x s32>) = COPY %d0 + %0(<2 x s32>) = COPY $d0 %1(<2 x s32>) = G_BITCAST %0 ... @@ -525,14 +525,14 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } # CHECK: body: -# CHECK: %0:gpr(s64) = COPY %x0 +# CHECK: %0:gpr(s64) = COPY $x0 # FAST: %1:fpr(<2 x s32>) = G_BITCAST %0 # GREEDY: %1:gpr(<2 x s32>) = G_BITCAST %0 body: | bb.0: - liveins: %x0 + liveins: $x0 - %0(s64) = COPY %x0 + %0(s64) = COPY $x0 %1(<2 x s32>) = G_BITCAST %0 ... @@ -544,14 +544,14 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } # CHECK: body: -# CHECK: %0:fpr(<2 x s32>) = COPY %d0 +# CHECK: %0:fpr(<2 x s32>) = COPY $d0 # FAST: %1:gpr(s64) = G_BITCAST %0 # GREEDY: %1:fpr(s64) = G_BITCAST %0 body: | bb.0: - liveins: %d0 + liveins: $d0 - %0(<2 x s32>) = COPY %d0 + %0(<2 x s32>) = COPY $d0 %1(s64) = G_BITCAST %0 ... @@ -569,13 +569,13 @@ registers: # CHECK: %2:fpr(<2 x s64>) = G_BITCAST %3(s128) body: | bb.1: - liveins: %x0, %x1 - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + liveins: $x0, $x1 + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %3(s128) = G_MERGE_VALUES %0(s64), %1(s64) %2(<2 x s64>) = G_BITCAST %3(s128) - %q0 = COPY %2(<2 x s64>) - RET_ReallyLR implicit %q0 + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 ... @@ -598,14 +598,14 @@ registers: # CHECK-NEXT: %2:fpr(<2 x s64>) = G_BITCAST %4(s128) body: | bb.1: - liveins: %x0, %x1 - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + liveins: $x0, $x1 + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %3(s128) = G_MERGE_VALUES %0(s64), %1(s64) %4(s128) = COPY %3(s128) %2(<2 x s64>) = G_BITCAST %4(s128) - %q0 = COPY %2(<2 x s64>) - RET_ReallyLR implicit %q0 + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 ... @@ -630,11 +630,11 @@ registers: # CHECK: %1:fpr(s128) = COPY %0 body: | bb.1: - liveins: %x0 - %0 = LDRQui killed %x0, 0 + liveins: $x0 + %0 = LDRQui killed $x0, 0 %1(s128) = COPY %0 - %q0 = COPY %1(s128) - RET_ReallyLR implicit %q0 + $q0 = COPY %1(s128) + RET_ReallyLR implicit $q0 ... @@ -654,15 +654,15 @@ registers: # CHECK: registers: # CHECK: - { id: 0, class: gpr, preferred-register: '' } # CHECK: - { id: 1, class: gpr, preferred-register: '' } -# CHECK: %0:gpr(s32) = COPY %w0 +# CHECK: %0:gpr(s32) = COPY $w0 # CHECK-NEXT: %1:gpr(s16) = G_TRUNC %0(s32) body: | bb.1: - liveins: %w0 - %0(s32) = COPY %w0 + liveins: $w0 + %0(s32) = COPY $w0 %1(s16) = G_TRUNC %0(s32) - %h0 = COPY %1(s16) - RET_ReallyLR implicit %h0 + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 ... @@ -682,8 +682,8 @@ registers: - { id: 4, class: _ } - { id: 5, class: _ } # No repairing should be necessary for both modes. -# CHECK: %0:gpr(s64) = COPY %x0 -# CHECK-NEXT: %1:gpr(p0) = COPY %x1 +# CHECK: %0:gpr(s64) = COPY $x0 +# CHECK-NEXT: %1:gpr(p0) = COPY $x1 # FAST-NEXT: %2:fpr(<2 x s32>) = G_BITCAST %0(s64) # FAST-NEXT: %3:fpr(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr) # FAST-NEXT: %4:fpr(<2 x s32>) = G_OR %2, %3 @@ -691,20 +691,20 @@ registers: # GREEDY-NEXT: %3:gpr(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr) # GREEDY-NEXT: %4:gpr(<2 x s32>) = G_OR %2, %3 # CHECK-NEXT: %5:gpr(s64) = G_BITCAST %4(<2 x s32>) -# CHECK-NEXT: %x0 = COPY %5(s64) -# CHECK-NEXT: RET_ReallyLR implicit %x0 +# CHECK-NEXT: $x0 = COPY %5(s64) +# CHECK-NEXT: RET_ReallyLR implicit $x0 body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 - %0(s64) = COPY %x0 - %1(p0) = COPY %x1 + %0(s64) = COPY $x0 + %1(p0) = COPY $x1 %2(<2 x s32>) = G_BITCAST %0(s64) %3(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr) %4(<2 x s32>) = G_OR %2, %3 %5(s64) = G_BITCAST %4(<2 x s32>) - %x0 = COPY %5(s64) - RET_ReallyLR implicit %x0 + $x0 = COPY %5(s64) + RET_ReallyLR implicit $x0 ... @@ -728,25 +728,25 @@ registers: - { id: 3, class: _ } # No repairing should be necessary for both modes. -# CHECK: %0:gpr(s64) = COPY %x0 -# CHECK-NEXT: %1:gpr(p0) = COPY %x1 +# CHECK: %0:gpr(s64) = COPY $x0 +# CHECK-NEXT: %1:gpr(p0) = COPY $x1 # CHECK-NEXT: %2:fpr(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr) # %0 has been mapped to GPR, we need to repair to match FPR. # CHECK-NEXT: %4:fpr(s64) = COPY %0 # CHECK-NEXT: %3:fpr(s64) = G_FADD %4, %2 -# CHECK-NEXT: %x0 = COPY %3(s64) -# CHECK-NEXT: RET_ReallyLR implicit %x0 +# CHECK-NEXT: $x0 = COPY %3(s64) +# CHECK-NEXT: RET_ReallyLR implicit $x0 body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 - %0(s64) = COPY %x0 - %1(p0) = COPY %x1 + %0(s64) = COPY $x0 + %1(p0) = COPY $x1 %2(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr) %3(s64) = G_FADD %0, %2 - %x0 = COPY %3(s64) - RET_ReallyLR implicit %x0 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 ... @@ -768,8 +768,8 @@ registers: - { id: 1, class: _ } - { id: 2, class: _ } -# CHECK: %0:gpr(s64) = COPY %x0 -# CHECK-NEXT: %1:gpr(p0) = COPY %x1 +# CHECK: %0:gpr(s64) = COPY $x0 +# CHECK-NEXT: %1:gpr(p0) = COPY $x1 # %0 has been mapped to GPR, we need to repair to match FPR. # CHECK-NEXT: %3:fpr(s64) = COPY %0 # CHECK-NEXT: %4:fpr(s64) = COPY %0 @@ -779,10 +779,10 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 - %0(s64) = COPY %x0 - %1(p0) = COPY %x1 + %0(s64) = COPY $x0 + %1(p0) = COPY $x1 %2(s64) = G_FADD %0, %0 G_STORE %2(s64), %1(p0) :: (store 8 into %ir.addr) RET_ReallyLR @@ -804,23 +804,23 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } -# CHECK: %1:gpr(s32) = COPY %w0 +# CHECK: %1:gpr(s32) = COPY $w0 # CHECK-NEXT: %0:gpr(s16) = G_TRUNC %1 # %0 has been mapped to GPR, we need to repair to match FPR. # CHECK-NEXT: %3:fpr(s16) = COPY %0 # CHECK-NEXT: %2:fpr(s32) = G_FPEXT %3 -# CHECK-NEXT: %s0 = COPY %2 +# CHECK-NEXT: $s0 = COPY %2 # CHECK-NEXT: RET_ReallyLR body: | bb.1: - liveins: %w0 + liveins: $w0 - %1(s32) = COPY %w0 + %1(s32) = COPY $w0 %0(s16) = G_TRUNC %1(s32) %2(s32) = G_FPEXT %0(s16) - %s0 = COPY %2(s32) - RET_ReallyLR implicit %s0 + $s0 = COPY %2(s32) + RET_ReallyLR implicit $s0 ... @@ -839,23 +839,23 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } -# CHECK: %1:gpr(s32) = COPY %w0 +# CHECK: %1:gpr(s32) = COPY $w0 # CHECK-NEXT: %0:gpr(s16) = G_TRUNC %1 # %0 has been mapped to GPR, we need to repair to match FPR. # CHECK-NEXT: %3:fpr(s16) = COPY %0 # CHECK-NEXT: %2:fpr(s64) = G_FPEXT %3 -# CHECK-NEXT: %d0 = COPY %2 +# CHECK-NEXT: $d0 = COPY %2 # CHECK-NEXT: RET_ReallyLR body: | bb.1: - liveins: %w0 + liveins: $w0 - %1(s32) = COPY %w0 + %1(s32) = COPY $w0 %0(s16) = G_TRUNC %1(s32) %2(s64) = G_FPEXT %0(s16) - %d0 = COPY %2(s64) - RET_ReallyLR implicit %d0 + $d0 = COPY %2(s64) + RET_ReallyLR implicit $d0 ... @@ -872,20 +872,20 @@ legalized: true registers: - { id: 0, class: _ } - { id: 1, class: _ } -# CHECK: %0:gpr(s32) = COPY %w0 +# CHECK: %0:gpr(s32) = COPY $w0 # %0 has been mapped to GPR, we need to repair to match FPR. # CHECK-NEXT: %2:fpr(s32) = COPY %0 # CHECK-NEXT: %1:fpr(s64) = G_FPEXT %2 -# CHECK-NEXT: %d0 = COPY %1 +# CHECK-NEXT: $d0 = COPY %1 # CHECK-NEXT: RET_ReallyLR body: | bb.1: - liveins: %w0 + liveins: $w0 - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s64) = G_FPEXT %0(s32) - %d0 = COPY %1(s64) - RET_ReallyLR implicit %d0 + $d0 = COPY %1(s64) + RET_ReallyLR implicit $d0 ... @@ -894,8 +894,8 @@ body: | # CHECK-LABEL: name: passFp16 # CHECK: registers: # CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: %0:fpr(s16) = COPY %h0 -# CHECK-NEXT: %h0 = COPY %0(s16) +# CHECK: %0:fpr(s16) = COPY $h0 +# CHECK-NEXT: $h0 = COPY %0(s16) name: passFp16 alignment: 2 legalized: true @@ -903,11 +903,11 @@ registers: - { id: 0, class: _ } body: | bb.1.entry: - liveins: %h0 + liveins: $h0 - %0(s16) = COPY %h0 - %h0 = COPY %0(s16) - RET_ReallyLR implicit %h0 + %0(s16) = COPY $h0 + $h0 = COPY %0(s16) + RET_ReallyLR implicit $h0 ... --- @@ -919,7 +919,7 @@ body: | # CHECK: - { id: 1, class: gpr, preferred-register: '' } # CHECK: - { id: 2, class: fpr, preferred-register: '' } # -# CHECK: %0:fpr(s16) = COPY %h0 +# CHECK: %0:fpr(s16) = COPY $h0 # CHECK-NEXT: %1:gpr(p0) = G_FRAME_INDEX %stack.0.p.addr # If we didn't look through the copy for %0, the default mapping # would have been on GPR and we would have to insert a copy to move @@ -929,7 +929,7 @@ body: | # would have been on GPR and we would have to insert a copy to move # the value to FPR (h0). # CHECK-NEXT: %2:fpr(s16) = G_LOAD %1(p0) :: (load 2 from %ir.p.addr) -# CHECK-NEXT: %h0 = COPY %2(s16) +# CHECK-NEXT: $h0 = COPY %2(s16) name: passFp16ViaAllocas alignment: 2 legalized: true @@ -944,13 +944,13 @@ stack: - { id: 0, name: p.addr, size: 2, alignment: 2, stack-id: 0 } body: | bb.1.entry: - liveins: %h0 + liveins: $h0 - %0(s16) = COPY %h0 + %0(s16) = COPY $h0 %1(p0) = G_FRAME_INDEX %stack.0.p.addr G_STORE %0(s16), %1(p0) :: (store 2 into %ir.p.addr) %2(s16) = G_LOAD %1(p0) :: (load 2 from %ir.p.addr) - %h0 = COPY %2(s16) - RET_ReallyLR implicit %h0 + $h0 = COPY %2(s16) + RET_ReallyLR implicit $h0 ... diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll index 4b6fab704da1..1cb2e9aa1860 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll @@ -11,7 +11,7 @@ ; CHECK: [[RHS:%[0-9]+]]:_(s8) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0) ; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[LHS]], [[RHS]] ; CHECK: [[SUM32:%[0-9]+]]:_(s32) = G_SEXT [[SUM]](s8) -; CHECK: %w0 = COPY [[SUM32]](s32) +; CHECK: $w0 = COPY [[SUM32]](s32) define signext i8 @test_stack_slots([8 x i64], i8 signext %lhs, i8 signext %rhs) { %sum = add i8 %lhs, %rhs ret i8 %sum @@ -20,11 +20,11 @@ define signext i8 @test_stack_slots([8 x i64], i8 signext %lhs, i8 signext %rhs) ; CHECK-LABEL: name: test_call_stack ; CHECK: [[C42:%[0-9]+]]:_(s8) = G_CONSTANT i8 42 ; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 12 -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[C42_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C42_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[C42_OFFS]](s64) ; CHECK: G_STORE [[C42]](s8), [[C42_LOC]](p0) :: (store 1 into stack, align 0) -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[C12_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[C12_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[C12_OFFS]](s64) ; CHECK: G_STORE [[C12]](s8), [[C12_LOC]](p0) :: (store 1 into stack + 1, align 0) @@ -35,9 +35,9 @@ define void @test_call_stack() { } ; CHECK-LABEL: name: test_128bit_struct -; CHECK: %x0 = COPY -; CHECK: %x1 = COPY -; CHECK: %x2 = COPY +; CHECK: $x0 = COPY +; CHECK: $x1 = COPY +; CHECK: $x2 = COPY ; CHECK: BL @take_128bit_struct define void @test_128bit_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr @@ -46,28 +46,34 @@ define void @test_128bit_struct([2 x i64]* %ptr) { } ; CHECK-LABEL: name: take_128bit_struct -; CHECK: {{%.*}}:_(p0) = COPY %x0 -; CHECK: {{%.*}}:_(s64) = COPY %x1 -; CHECK: {{%.*}}:_(s64) = COPY %x2 +; CHECK: {{%.*}}:_(p0) = COPY $x0 +; CHECK: {{%.*}}:_(s64) = COPY $x1 +; CHECK: {{%.*}}:_(s64) = COPY $x2 define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { store [2 x i64] %in, [2 x i64]* %ptr ret void } ; CHECK-LABEL: name: test_split_struct -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD {{.*}}(p0) -; CHECK: [[LO:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 0 -; CHECK: [[HI:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 64 +; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP %0, [[CST]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.ptr + 8) +; CHECK: [[IMPDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s128) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s128) = G_INSERT [[INS1]], [[LD2]](s64), 64 +; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 0 +; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 64 -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] -; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]](s64) +; CHECK: G_STORE [[EXT1]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] -; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) +; CHECK: G_STORE [[EXT2]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) define void @test_split_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll index 23a39a336fa3..0902e29e4a1c 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -1,9 +1,9 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s ; CHECK-LABEL: name: test_trivial_call -; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def %sp, implicit %sp -; CHECK: BL @trivial_callee, csr_aarch64_aapcs, implicit-def %lr -; CHECK: ADJCALLSTACKUP 0, 0, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp +; CHECK: BL @trivial_callee, csr_aarch64_aapcs, implicit-def $lr +; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp declare void @trivial_callee() define void @test_trivial_call() { call void @trivial_callee() @@ -11,10 +11,10 @@ define void @test_trivial_call() { } ; CHECK-LABEL: name: test_simple_return -; CHECK: BL @simple_return_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit-def %x0 -; CHECK: [[RES:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK: %x0 = COPY [[RES]] -; CHECK: RET_ReallyLR implicit %x0 +; CHECK: BL @simple_return_callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK: $x0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $x0 declare i64 @simple_return_callee() define i64 @test_simple_return() { %res = call i64 @simple_return_callee() @@ -22,9 +22,9 @@ define i64 @test_simple_return() { } ; CHECK-LABEL: name: test_simple_arg -; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 -; CHECK: %w0 = COPY [[IN]] -; CHECK: BL @simple_arg_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: $w0 = COPY [[IN]] +; CHECK: BL @simple_arg_callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 ; CHECK: RET_ReallyLR declare void @simple_arg_callee(i32 %in) define void @test_simple_arg(i32 %in) { @@ -36,8 +36,8 @@ define void @test_simple_arg(i32 %in) { ; CHECK: registers: ; Make sure the register feeding the indirect call is properly constrained. ; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64, preferred-register: '' } -; CHECK: %[[FUNC]]:gpr64(p0) = COPY %x0 -; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def %lr, implicit %sp +; CHECK: %[[FUNC]]:gpr64(p0) = COPY $x0 +; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def $lr, implicit $sp ; CHECK: RET_ReallyLR define void @test_indirect_call(void()* %func) { call void %func() @@ -45,11 +45,11 @@ define void @test_indirect_call(void()* %func) { } ; CHECK-LABEL: name: test_multiple_args -; CHECK: [[IN:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[IN:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 -; CHECK: %w0 = COPY [[ANSWER]] -; CHECK: %x1 = COPY [[IN]] -; CHECK: BL @multiple_args_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0, implicit %x1 +; CHECK: $w0 = COPY [[ANSWER]] +; CHECK: $x1 = COPY [[IN]] +; CHECK: BL @multiple_args_callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $x1 ; CHECK: RET_ReallyLR declare void @multiple_args_callee(i32, i64) define void @test_multiple_args(i64 %in) { @@ -59,19 +59,27 @@ define void @test_multiple_args(i64 %in) { ; CHECK-LABEL: name: test_struct_formal -; CHECK: [[DBL:%[0-9]+]]:_(s64) = COPY %d0 -; CHECK: [[I64:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK: [[I8_C:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[DBL:%[0-9]+]]:_(s64) = COPY $d0 +; CHECK: [[I64:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK: [[I8_C:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[I8:%[0-9]+]]:_(s8) = G_TRUNC [[I8_C]] -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[UNDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF ; CHECK: [[ARG0:%[0-9]+]]:_(s192) = G_INSERT [[UNDEF]], [[DBL]](s64), 0 ; CHECK: [[ARG1:%[0-9]+]]:_(s192) = G_INSERT [[ARG0]], [[I64]](s64), 64 ; CHECK: [[ARG2:%[0-9]+]]:_(s192) = G_INSERT [[ARG1]], [[I8]](s8), 128 ; CHECK: [[ARG:%[0-9]+]]:_(s192) = COPY [[ARG2]] - -; CHECK: G_STORE [[ARG]](s192), [[ADDR]](p0) +; CHECK: [[EXTA0:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 0 +; CHECK: [[EXTA1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 64 +; CHECK: [[EXTA2:%[0-9]+]]:_(s8) = G_EXTRACT [[ARG]](s192), 128 +; CHECK: G_STORE [[EXTA0]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64) +; CHECK: G_STORE [[EXTA1]](s64), [[GEP1]](p0) :: (store 8 into %ir.addr + 8) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64) +; CHECK: G_STORE [[EXTA2]](s8), [[GEP2]](p0) :: (store 1 into %ir.addr + 16, align 8) ; CHECK: RET_ReallyLR define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) { store {double, i64, i8} %in, {double, i64, i8}* %addr @@ -80,17 +88,28 @@ define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) ; CHECK-LABEL: name: test_struct_return -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]]:_(s192) = G_LOAD [[ADDR]](p0) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 + +; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.addr + 8) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 16, align 8) +; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[LD2]](s64), 64 +; CHECK: [[VAL:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[LD3]](s32), 128 ; CHECK: [[DBL:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 0 ; CHECK: [[I64:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 64 ; CHECK: [[I32:%[0-9]+]]:_(s32) = G_EXTRACT [[VAL]](s192), 128 -; CHECK: %d0 = COPY [[DBL]](s64) -; CHECK: %x0 = COPY [[I64]](s64) -; CHECK: %w1 = COPY [[I32]](s32) -; CHECK: RET_ReallyLR implicit %d0, implicit %x0, implicit %w1 +; CHECK: $d0 = COPY [[DBL]](s64) +; CHECK: $x0 = COPY [[I64]](s64) +; CHECK: $w1 = COPY [[I32]](s32) +; CHECK: RET_ReallyLR implicit $d0, implicit $x0, implicit $w1 define {double, i64, i32} @test_struct_return({double, i64, i32}* %addr) { %val = load {double, i64, i32}, {double, i64, i32}* %addr ret {double, i64, i32} %val @@ -98,22 +117,36 @@ define {double, i64, i32} @test_struct_return({double, i64, i32}* %addr) { ; CHECK-LABEL: name: test_arr_call ; CHECK: hasCalls: true -; CHECK: [[ARG:%[0-9]+]]:_(s256) = G_LOAD - +; CHECK: %0:_(p0) = COPY $x0 +; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.addr) +; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[CST1]](s64) +; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.addr + 8) +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP %0, [[CST2]](s64) +; CHECK: [[LD3:%[0-9]+]]:_(s64) = G_LOAD [[GEP2]](p0) :: (load 8 from %ir.addr + 16) +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP %0, [[CST3]](s64) +; CHECK: [[LD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP3]](p0) :: (load 8 from %ir.addr + 24) +; CHECK: [[IMPDEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s256) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s256) = G_INSERT [[INS1]], [[LD2]](s64), 64 +; CHECK: [[INS3:%[0-9]+]]:_(s256) = G_INSERT [[INS2]], [[LD3]](s64), 128 +; CHECK: [[ARG:%[0-9]+]]:_(s256) = G_INSERT [[INS3]], [[LD4]](s64), 192 ; CHECK: [[E0:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 0 ; CHECK: [[E1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 64 ; CHECK: [[E2:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 128 ; CHECK: [[E3:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 192 -; CHECK: %x0 = COPY [[E0]](s64) -; CHECK: %x1 = COPY [[E1]](s64) -; CHECK: %x2 = COPY [[E2]](s64) -; CHECK: %x3 = COPY [[E3]](s64) -; CHECK: BL @arr_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2, implicit %x3, implicit-def %x0, implicit-def %x1, implicit-def %x2, implicit-def %x3 -; CHECK: [[E0:%[0-9]+]]:_(s64) = COPY %x0 -; CHECK: [[E1:%[0-9]+]]:_(s64) = COPY %x1 -; CHECK: [[E2:%[0-9]+]]:_(s64) = COPY %x2 -; CHECK: [[E3:%[0-9]+]]:_(s64) = COPY %x3 +; CHECK: $x0 = COPY [[E0]](s64) +; CHECK: $x1 = COPY [[E1]](s64) +; CHECK: $x2 = COPY [[E2]](s64) +; CHECK: $x3 = COPY [[E3]](s64) +; CHECK: BL @arr_callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit-def $x0, implicit-def $x1, implicit-def $x2, implicit-def $x3 +; CHECK: [[E0:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK: [[E1:%[0-9]+]]:_(s64) = COPY $x1 +; CHECK: [[E2:%[0-9]+]]:_(s64) = COPY $x2 +; CHECK: [[E3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK: [[RES:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[E0]](s64), [[E1]](s64), [[E2]](s64), [[E3]](s64) ; CHECK: G_EXTRACT [[RES]](s256), 64 declare [4 x i64] @arr_callee([4 x i64]) @@ -128,14 +161,14 @@ define i64 @test_arr_call([4 x i64]* %addr) { ; CHECK-LABEL: name: test_abi_exts_call ; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_LOAD ; CHECK: [[VAL_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[VAL]] -; CHECK: %w0 = COPY [[VAL_TMP]] -; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0 +; CHECK: $w0 = COPY [[VAL_TMP]] +; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 ; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_SEXT [[VAL]](s8) -; CHECK: %w0 = COPY [[SVAL]](s32) -; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0 +; CHECK: $w0 = COPY [[SVAL]](s32) +; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 ; CHECK: [[ZVAL:%[0-9]+]]:_(s32) = G_ZEXT [[VAL]](s8) -; CHECK: %w0 = COPY [[ZVAL]](s32) -; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0 +; CHECK: $w0 = COPY [[ZVAL]](s32) +; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 declare void @take_char(i8) define void @test_abi_exts_call(i8* %addr) { %val = load i8, i8* %addr @@ -148,8 +181,8 @@ define void @test_abi_exts_call(i8* %addr) { ; CHECK-LABEL: name: test_abi_sext_ret ; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_LOAD ; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_SEXT [[VAL]](s8) -; CHECK: %w0 = COPY [[SVAL]](s32) -; CHECK: RET_ReallyLR implicit %w0 +; CHECK: $w0 = COPY [[SVAL]](s32) +; CHECK: RET_ReallyLR implicit $w0 define signext i8 @test_abi_sext_ret(i8* %addr) { %val = load i8, i8* %addr ret i8 %val @@ -158,8 +191,8 @@ define signext i8 @test_abi_sext_ret(i8* %addr) { ; CHECK-LABEL: name: test_abi_zext_ret ; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_LOAD ; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_ZEXT [[VAL]](s8) -; CHECK: %w0 = COPY [[SVAL]](s32) -; CHECK: RET_ReallyLR implicit %w0 +; CHECK: $w0 = COPY [[SVAL]](s32) +; CHECK: RET_ReallyLR implicit $w0 define zeroext i8 @test_abi_zext_ret(i8* %addr) { %val = load i8, i8* %addr ret i8 %val @@ -187,22 +220,23 @@ define void @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs, i64* %addr) { ; CHECK-LABEL: name: test_call_stack ; CHECK: [[C42:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 -; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 -; CHECK: ADJCALLSTACKDOWN 24, 0, implicit-def %sp, implicit %sp -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[ZERO:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZERO]] +; CHECK: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[C42_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C42_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[C42_OFFS]](s64) ; CHECK: G_STORE [[C42]](s64), [[C42_LOC]](p0) :: (store 8 into stack, align 0) -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[C12_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[C12_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[C12_OFFS]](s64) ; CHECK: G_STORE [[C12]](s64), [[C12_LOC]](p0) :: (store 8 into stack + 8, align 0) -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[PTR_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[PTR_OFFS]](s64) ; CHECK: G_STORE [[PTR]](p0), [[PTR_LOC]](p0) :: (store 8 into stack + 16, align 0) ; CHECK: BL @test_stack_slots -; CHECK: ADJCALLSTACKUP 24, 0, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp define void @test_call_stack() { call void @test_stack_slots([8 x i64] undef, i64 42, i64 12, i64* null) ret void @@ -219,9 +253,9 @@ define void @test_mem_i1([8 x i64], i1 %in) { } ; CHECK-LABEL: name: test_128bit_struct -; CHECK: %x0 = COPY -; CHECK: %x1 = COPY -; CHECK: %x2 = COPY +; CHECK: $x0 = COPY +; CHECK: $x1 = COPY +; CHECK: $x2 = COPY ; CHECK: BL @take_128bit_struct define void @test_128bit_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr @@ -230,28 +264,35 @@ define void @test_128bit_struct([2 x i64]* %ptr) { } ; CHECK-LABEL: name: take_128bit_struct -; CHECK: {{%.*}}:_(p0) = COPY %x0 -; CHECK: {{%.*}}:_(s64) = COPY %x1 -; CHECK: {{%.*}}:_(s64) = COPY %x2 +; CHECK: {{%.*}}:_(p0) = COPY $x0 +; CHECK: {{%.*}}:_(s64) = COPY $x1 +; CHECK: {{%.*}}:_(s64) = COPY $x2 define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { store [2 x i64] %in, [2 x i64]* %ptr ret void } ; CHECK-LABEL: name: test_split_struct -; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD {{.*}}(p0) -; CHECK: [[LO:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 0 -; CHECK: [[HI:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 64 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) +; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST]](s64) +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8 from %ir.ptr + 8) -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp -; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] -; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) +; CHECK: [[IMPDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s128) = G_INSERT [[IMPDEF]], [[LO]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s128) = G_INSERT [[INS1]], [[HI]](s64), 64 +; CHECK: [[EXTLO:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 0 +; CHECK: [[EXTHI:%[0-9]+]]:_(s64) = G_EXTRACT [[INS2]](s128), 64 -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp -; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] -; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp +; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[CST2]](s64) +; CHECK: G_STORE [[EXTLO]](s64), [[GEP2]](p0) :: (store 8 into stack, align 0) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp +; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[CST3]](s64) +; CHECK: G_STORE [[EXTHI]](s64), [[GEP3]](p0) :: (store 8 into stack + 8, align 0) define void @test_split_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, diff --git a/test/CodeGen/AArch64/GlobalISel/combine-anyext-crash.mir b/test/CodeGen/AArch64/GlobalISel/combine-anyext-crash.mir index 339adf51451b..1671a2692bc9 100644 --- a/test/CodeGen/AArch64/GlobalISel/combine-anyext-crash.mir +++ b/test/CodeGen/AArch64/GlobalISel/combine-anyext-crash.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--" diff --git a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll index be510b5f7e3b..8309e00e2fcb 100644 --- a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll +++ b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll @@ -5,8 +5,8 @@ ; CHECK: stack: ; CHECK: - { id: {{.*}}, name: in.addr, type: default, offset: 0, size: {{.*}}, alignment: {{.*}}, ; CHECK-NEXT: callee-saved-register: '', callee-saved-restored: true, -; CHECK-NEXT: di-variable: '!11', di-expression: '!DIExpression()', -; CHECK: DBG_VALUE debug-use %0(s32), debug-use %noreg, !11, !DIExpression(), debug-location !12 +; CHECK-NEXT: debug-info-variable: '!11', debug-info-expression: '!DIExpression()', +; CHECK: DBG_VALUE debug-use %0(s32), debug-use $noreg, !11, !DIExpression(), debug-location !12 define void @debug_declare(i32 %in) #0 !dbg !7 { entry: %in.addr = alloca i32, align 4 @@ -17,7 +17,7 @@ entry: } ; CHECK-LABEL: name: debug_declare_vla -; CHECK: DBG_VALUE debug-use %{{[0-9]+}}(p0), debug-use %noreg, !14, !DIExpression(), debug-location !15 +; CHECK: DBG_VALUE debug-use %{{[0-9]+}}(p0), debug-use $noreg, !14, !DIExpression(), debug-location !15 define void @debug_declare_vla(i32 %in) #0 !dbg !13 { entry: %vla.addr = alloca i32, i32 %in @@ -26,19 +26,19 @@ entry: } ; CHECK-LABEL: name: debug_value -; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $w0 define void @debug_value(i32 %in) #0 !dbg !16 { %addr = alloca i32 -; CHECK: DBG_VALUE debug-use [[IN]](s32), debug-use %noreg, !17, !DIExpression(), debug-location !18 +; CHECK: DBG_VALUE debug-use [[IN]](s32), debug-use $noreg, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata i32 %in, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 store i32 %in, i32* %addr -; CHECK: DBG_VALUE debug-use %1(p0), debug-use %noreg, !17, !DIExpression(DW_OP_deref), debug-location !18 +; CHECK: DBG_VALUE debug-use %1(p0), debug-use $noreg, !17, !DIExpression(DW_OP_deref), debug-location !18 call void @llvm.dbg.value(metadata i32* %addr, i64 0, metadata !17, metadata !DIExpression(DW_OP_deref)), !dbg !18 ; CHECK: DBG_VALUE 123, 0, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata i32 123, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 ; CHECK: DBG_VALUE float 1.000000e+00, 0, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata float 1.000000e+00, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 -; CHECK: DBG_VALUE %noreg, 0, !17, !DIExpression(), debug-location !18 +; CHECK: DBG_VALUE $noreg, 0, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 ret void } @@ -58,15 +58,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) !4 = !{i32 2, !"Debug Info Version", i32 3} !5 = !{i32 1, !"PIC Level", i32 2} !6 = !{!"clang version 4.0.0 (trunk 289075) (llvm/trunk 289080)"} -!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !8 = !DISubroutineType(types: !9) !9 = !{null, !10} !10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !11 = !DILocalVariable(name: "in", arg: 1, scope: !7, file: !1, line: 1, type: !10) !12 = !DILocation(line: 1, column: 14, scope: !7) -!13 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!13 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !14 = !DILocalVariable(name: "in", arg: 1, scope: !13, file: !1, line: 1, type: !10) !15 = !DILocation(line: 1, column: 14, scope: !13) -!16 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!16 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !17 = !DILocalVariable(name: "in", arg: 1, scope: !16, file: !1, line: 1, type: !10) !18 = !DILocation(line: 1, column: 14, scope: !16) diff --git a/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll index 62aceaa81308..baf0fa9aa444 100644 --- a/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll +++ b/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll @@ -1,47 +1,47 @@ ; RUN: llc -mtriple=aarch64 -global-isel %s -o - -stop-after=irtranslator | FileCheck %s ; CHECK-LABEL: name: test_simple_alloca -; CHECK: [[NUMELTS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[NUMELTS:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[TYPE_SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 ; CHECK: [[NUMELTS_64:%[0-9]+]]:_(s64) = G_ZEXT [[NUMELTS]](s32) ; CHECK: [[NUMBYTES:%[0-9]+]]:_(s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]] -; CHECK: [[SP_TMP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP_TMP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = G_GEP [[SP_TMP]], [[NUMBYTES]] ; CHECK: [[ALIGNED_ALLOC:%[0-9]+]]:_(p0) = G_PTR_MASK [[ALLOC]], 4 -; CHECK: %sp = COPY [[ALIGNED_ALLOC]] +; CHECK: $sp = COPY [[ALIGNED_ALLOC]] ; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = COPY [[ALIGNED_ALLOC]] -; CHECK: %x0 = COPY [[ALLOC]] +; CHECK: $x0 = COPY [[ALLOC]] define i8* @test_simple_alloca(i32 %numelts) { %addr = alloca i8, i32 %numelts ret i8* %addr } ; CHECK-LABEL: name: test_aligned_alloca -; CHECK: [[NUMELTS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[NUMELTS:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[TYPE_SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 ; CHECK: [[NUMELTS_64:%[0-9]+]]:_(s64) = G_ZEXT [[NUMELTS]](s32) ; CHECK: [[NUMBYTES:%[0-9]+]]:_(s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]] -; CHECK: [[SP_TMP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP_TMP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = G_GEP [[SP_TMP]], [[NUMBYTES]] ; CHECK: [[ALIGNED_ALLOC:%[0-9]+]]:_(p0) = G_PTR_MASK [[ALLOC]], 5 -; CHECK: %sp = COPY [[ALIGNED_ALLOC]] +; CHECK: $sp = COPY [[ALIGNED_ALLOC]] ; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = COPY [[ALIGNED_ALLOC]] -; CHECK: %x0 = COPY [[ALLOC]] +; CHECK: $x0 = COPY [[ALLOC]] define i8* @test_aligned_alloca(i32 %numelts) { %addr = alloca i8, i32 %numelts, align 32 ret i8* %addr } ; CHECK-LABEL: name: test_natural_alloca -; CHECK: [[NUMELTS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[NUMELTS:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[TYPE_SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 ; CHECK: [[NUMELTS_64:%[0-9]+]]:_(s64) = G_ZEXT [[NUMELTS]](s32) ; CHECK: [[NUMBYTES:%[0-9]+]]:_(s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]] -; CHECK: [[SP_TMP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP_TMP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = G_GEP [[SP_TMP]], [[NUMBYTES]] -; CHECK: %sp = COPY [[ALLOC]] +; CHECK: $sp = COPY [[ALLOC]] ; CHECK: [[ALLOC_TMP:%[0-9]+]]:_(p0) = COPY [[ALLOC]] -; CHECK: %x0 = COPY [[ALLOC_TMP]] +; CHECK: $x0 = COPY [[ALLOC_TMP]] define i128* @test_natural_alloca(i32 %numelts) { %addr = alloca i128, i32 %numelts ret i128* %addr diff --git a/test/CodeGen/AArch64/GlobalISel/fallback-nofastisel.ll b/test/CodeGen/AArch64/GlobalISel/fallback-nofastisel.ll new file mode 100644 index 000000000000..faaa725486f1 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/fallback-nofastisel.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=aarch64_be-- %s -o /dev/null -debug-only=isel -O0 2>&1 | FileCheck %s +; REQUIRES: asserts + +; This test uses big endian in order to force an abort since it's not currently supported for GISel. +; The purpose is to check that we don't fall back to FastISel. Checking the pass structure is insufficient +; because the FastISel is set up in the SelectionDAGISel, so it doesn't appear on the pass structure. + +; CHECK-NOT: Enabling fast-ise +define void @empty() { + ret void +} diff --git a/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir b/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir index 47fda8f998d7..c2ed472fcdd6 100644 --- a/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir +++ b/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s --- | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64" @@ -25,20 +25,21 @@ fixedStack: stack: - { id: 0, name: a.addr, type: default, offset: 0, size: 16, alignment: 16, stack-id: 0, callee-saved-register: '', callee-saved-restored: true, - di-variable: '', di-expression: '', di-location: '' } + debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } body: | bb.1.entry: - liveins: %q0 + liveins: $q0 ; This test just checks we don't crash on G_FNEG of FP128 types. Expect to fall ; back until support is added for fp128. ; CHECK: ret - %0:_(s128) = COPY %q0 + %0:_(s128) = COPY $q0 %1:_(p0) = G_FRAME_INDEX %stack.0.a.addr G_STORE %0(s128), %1(p0) :: (store 16 into %ir.a.addr) %2:_(s128) = G_LOAD %1(p0) :: (load 16 from %ir.a.addr) %3:_(s128) = G_FNEG %2 - %q0 = COPY %3(s128) - RET_ReallyLR implicit %q0 + $q0 = COPY %3(s128) + RET_ReallyLR implicit $q0 ... diff --git a/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir b/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir new file mode 100644 index 000000000000..78b989f0556b --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s + +# PR36345 +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-arm-none-eabi" + + ; Function Attrs: noinline nounwind optnone + define void @fp16_to_gpr([2 x half], [2 x half]* %addr) { + ret void + } + + define void @gpr_to_fp16() { + ret void + } + + define void @gpr_to_fp16_physreg() { + ret void + } +... +--- +name: fp16_to_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } + - { id: 9, class: gpr } + - { id: 10, class: gpr } + - { id: 11, class: gpr } + - { id: 12, class: gpr } +body: | + bb.1 (%ir-block.1): + liveins: $h0, $h1, $x0 + + ; CHECK-LABEL: name: fp16_to_gpr + ; CHECK: liveins: $h0, $h1, $x0 + ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]] + ; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY2]], 0, 15 + ; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG1]] + ; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY3]], 16, 15 + ; CHECK: [[COPY4:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: STRWui [[BFMWri1]], [[COPY4]], 0 :: (store 4 into %ir.addr, align 2) + ; CHECK: RET_ReallyLR + %1:fpr(s16) = COPY $h0 + %2:fpr(s16) = COPY $h1 + %3:gpr(s32) = G_IMPLICIT_DEF + %11:gpr(s16) = COPY %1(s16) + %4:gpr(s32) = G_INSERT %3, %11(s16), 0 + %12:gpr(s16) = COPY %2(s16) + %5:gpr(s32) = G_INSERT %4, %12(s16), 16 + %0:gpr(s32) = COPY %5(s32) + %6:gpr(p0) = COPY $x0 + G_STORE %0(s32), %6(p0) :: (store 4 into %ir.addr, align 2) + RET_ReallyLR + +... + +--- +name: gpr_to_fp16 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: fpr } +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: gpr_to_fp16 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub + ; CHECK: $h0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $h0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s16) = G_TRUNC %0(s32) + %2:fpr(s16) = COPY %1(s16) + $h0 = COPY %2(s16) + RET_ReallyLR implicit $h0 + +... +--- +name: gpr_to_fp16_physreg +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: gpr_to_fp16_physreg + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub + ; CHECK: $h0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $h0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s16) = G_TRUNC %0(s32) + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll index 0972840de47b..3920e1d99c28 100644 --- a/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ b/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -1,5 +1,8 @@ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ -; RUN: -O0 -aarch64-enable-global-isel-at-O=0 \ +; RUN: -O0 | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O0 --check-prefix FALLBACK + +; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ +; RUN: -O0 -aarch64-enable-global-isel-at-O=0 -global-isel-abort=1 \ ; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O0 --check-prefix NOFALLBACK ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ @@ -29,6 +32,9 @@ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: | FileCheck %s --check-prefix DISABLED +; RUN: llc -mtriple=aarch64-- -fast-isel=0 -global-isel=false \ +; RUN: -debug-pass=Structure %s -o /dev/null 2>&1 | FileCheck %s --check-prefix DISABLED + ; ENABLED: IRTranslator ; ENABLED-NEXT: Legalizer ; ENABLED-NEXT: RegBankSelect diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll index 70dddeb45859..e2b05a922960 100644 --- a/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll +++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll @@ -24,7 +24,7 @@ define i32 @test_bitcast_invalid_vreg() { ; At this point we mapped 46 values. The 'i32 100' constant will grow the map. ; CHECK: %46:_(s32) = G_CONSTANT i32 100 -; CHECK: %w0 = COPY %46(s32) +; CHECK: $w0 = COPY %46(s32) %res = bitcast i32 100 to i32 ret i32 %res } diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll index 827fdd261082..6b8827f095d4 100644 --- a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll +++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll @@ -11,24 +11,24 @@ declare i32 @llvm.eh.typeid.for(i8*) ; CHECK-NEXT: bb.1 (%ir-block.0): ; CHECK: successors: %[[GOOD:bb.[0-9]+]]{{.*}}%[[BAD:bb.[0-9]+]] ; CHECK: EH_LABEL -; CHECK: %w0 = COPY -; CHECK: BL @foo, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0, implicit-def %w0 -; CHECK: {{%[0-9]+}}:_(s32) = COPY %w0 +; CHECK: $w0 = COPY +; CHECK: BL @foo, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0 +; CHECK: {{%[0-9]+}}:_(s32) = COPY $w0 ; CHECK: EH_LABEL ; CHECK: G_BR %[[GOOD]] ; CHECK: [[BAD]].{{[a-z]+}} (landing-pad): ; CHECK: EH_LABEL +; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]] ; CHECK: [[UNDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF -; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY %x0 ; CHECK: [[VAL_WITH_PTR:%[0-9]+]]:_(s128) = G_INSERT [[UNDEF]], [[PTR]](p0), 0 -; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY %x1 -; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]] ; CHECK: [[PTR_SEL:%[0-9]+]]:_(s128) = G_INSERT [[VAL_WITH_PTR]], [[SEL]](s32), 64 ; CHECK: [[PTR_RET:%[0-9]+]]:_(s64) = G_EXTRACT [[PTR_SEL]](s128), 0 ; CHECK: [[SEL_RET:%[0-9]+]]:_(s32) = G_EXTRACT [[PTR_SEL]](s128), 64 -; CHECK: %x0 = COPY [[PTR_RET]] -; CHECK: %w1 = COPY [[SEL_RET]] +; CHECK: $x0 = COPY [[PTR_RET]] +; CHECK: $w1 = COPY [[SEL_RET]] ; CHECK: [[GOOD]].{{[a-z]+}}: ; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -49,7 +49,7 @@ continue: } ; CHECK-LABEL: name: test_invoke_indirect -; CHECK: [[CALLEE:%[0-9]+]]:gpr64(p0) = COPY %x0 +; CHECK: [[CALLEE:%[0-9]+]]:gpr64(p0) = COPY $x0 ; CHECK: BLR [[CALLEE]] define void @test_invoke_indirect(void()* %callee) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { invoke void %callee() to label %continue unwind label %broken @@ -64,18 +64,20 @@ continue: ; CHECK-LABEL: name: test_invoke_varargs -; CHECK: [[NULL:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 +; CHECK: [[ZERO:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[NULL:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZERO]] ; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK: [[ONE:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.0 -; CHECK: %x0 = COPY [[NULL]] +; CHECK: $x0 = COPY [[NULL]] -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[SLOT:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFFSET]](s64) -; CHECK: G_STORE [[ANSWER]](s32), [[SLOT]] +; CHECK: [[ANSWER_EXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ANSWER]] +; CHECK: G_STORE [[ANSWER_EXT]](s64), [[SLOT]] -; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[SLOT:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFFSET]](s64) ; CHECK: G_STORE [[ONE]](s32), [[SLOT]] diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll new file mode 100644 index 000000000000..9bda39c9fca7 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll @@ -0,0 +1,14 @@ +; RUN: llc -O0 -mtriple=aarch64-apple-ios -o - %s | FileCheck %s + +@g = global i16 0, align 2 +declare void @bar(i32) + +; Check that only one load is generated. We fall back to +define hidden void @foo() { +; CHECK-NOT: ldrh +; CHECK: ldrsh + %1 = load volatile i16, i16* @g, align 2 + %2 = sext i16 %1 to i32 + call void @bar(i32 %2) + ret void +} diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir index 99b37d0925b7..4b2d54bcd0d3 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -30,29 +30,29 @@ name: test_scalar_add_big body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_scalar_add_big - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY %x2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY %x3 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) ; CHECK: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[COPY]], [[COPY2]], [[TRUNC]] ; CHECK: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[COPY1]], [[COPY3]], [[UADDE1]] - ; CHECK: %x0 = COPY [[UADDE]](s64) - ; CHECK: %x1 = COPY [[UADDE2]](s64) - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 - %2:_(s64) = COPY %x2 - %3:_(s64) = COPY %x3 + ; CHECK: $x0 = COPY [[UADDE]](s64) + ; CHECK: $x1 = COPY [[UADDE2]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = COPY $x3 %4:_(s128) = G_MERGE_VALUES %0, %1 %5:_(s128) = G_MERGE_VALUES %2, %3 %6:_(s128) = G_ADD %4, %5 %7:_(s64), %8:_(s64) = G_UNMERGE_VALUES %6 - %x0 = COPY %7 - %x1 = COPY %8 + $x0 = COPY %7 + $x1 = COPY %8 ... --- @@ -70,7 +70,7 @@ registers: - { id: 9, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_scalar_add_big_nonpow2 ; CHECK-NOT: G_MERGE_VALUES ; CHECK-NOT: G_UNMERGE_VALUES @@ -81,71 +81,71 @@ body: | ; CHECK: [[RES_HI:%[0-9]+]]:_(s64), {{%.*}}(s1) = G_UADDE %2, %3, [[CARRY2]] ; CHECK-NOT: G_MERGE_VALUES ; CHECK-NOT: G_UNMERGE_VALUES - ; CHECK: %x0 = COPY [[RES_LO]] - ; CHECK: %x1 = COPY [[RES_MI]] - ; CHECK: %x2 = COPY [[RES_HI]] + ; CHECK: $x0 = COPY [[RES_LO]] + ; CHECK: $x1 = COPY [[RES_MI]] + ; CHECK: $x2 = COPY [[RES_HI]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 - %2(s64) = COPY %x2 - %3(s64) = COPY %x3 + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 + %2(s64) = COPY $x2 + %3(s64) = COPY $x3 %4(s192) = G_MERGE_VALUES %0, %1, %2 %5(s192) = G_MERGE_VALUES %1, %2, %3 %6(s192) = G_ADD %4, %5 %7(s64), %8(s64), %9(s64) = G_UNMERGE_VALUES %6 - %x0 = COPY %7 - %x1 = COPY %8 - %x2 = COPY %9 + $x0 = COPY %7 + $x1 = COPY %8 + $x2 = COPY %9 ... --- name: test_scalar_add_small body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_scalar_add_small - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[TRUNC1]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ADD]](s32) - ; CHECK: %x0 = COPY [[ANYEXT]](s64) - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[ANYEXT]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 %2:_(s8) = G_TRUNC %0 %3:_(s8) = G_TRUNC %1 %4:_(s8) = G_ADD %2, %3 %5:_(s64) = G_ANYEXT %4 - %x0 = COPY %5 + $x0 = COPY %5 ... --- name: test_vector_add body: | bb.0.entry: - liveins: %q0, %q1, %q2, %q3 + liveins: $q0, $q1, $q2, $q3 ; CHECK-LABEL: name: test_vector_add - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY %q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY %q1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY %q2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY %q3 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 ; CHECK: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD [[COPY]], [[COPY2]] ; CHECK: [[ADD1:%[0-9]+]]:_(<2 x s64>) = G_ADD [[COPY1]], [[COPY3]] - ; CHECK: %q0 = COPY [[ADD]](<2 x s64>) - ; CHECK: %q1 = COPY [[ADD1]](<2 x s64>) - %0:_(<2 x s64>) = COPY %q0 - %1:_(<2 x s64>) = COPY %q1 - %2:_(<2 x s64>) = COPY %q2 - %3:_(<2 x s64>) = COPY %q3 + ; CHECK: $q0 = COPY [[ADD]](<2 x s64>) + ; CHECK: $q1 = COPY [[ADD1]](<2 x s64>) + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(<2 x s64>) = COPY $q2 + %3:_(<2 x s64>) = COPY $q3 %4:_(<4 x s64>) = G_MERGE_VALUES %0, %1 %5:_(<4 x s64>) = G_MERGE_VALUES %2, %3 %6:_(<4 x s64>) = G_ADD %4, %5 %7:_(<2 x s64>), %8:_(<2 x s64>) = G_UNMERGE_VALUES %6 - %q0 = COPY %7 - %q1 = COPY %8 + $q0 = COPY %7 + $q1 = COPY %8 ... --- name: test_vector_add_nonpow2 @@ -162,7 +162,7 @@ registers: - { id: 9, class: _ } body: | bb.0.entry: - liveins: %q0, %q1, %q2, %q3 + liveins: $q0, $q1, $q2, $q3 ; CHECK-LABEL: name: test_vector_add_nonpow2 ; CHECK-NOT: G_EXTRACT ; CHECK-NOT: G_SEQUENCE @@ -171,19 +171,19 @@ body: | ; CHECK: [[RES_HI:%[0-9]+]]:_(<2 x s64>) = G_ADD %2, %3 ; CHECK-NOT: G_EXTRACT ; CHECK-NOT: G_SEQUENCE - ; CHECK: %q0 = COPY [[RES_LO]] - ; CHECK: %q1 = COPY [[RES_MI]] - ; CHECK: %q2 = COPY [[RES_HI]] + ; CHECK: $q0 = COPY [[RES_LO]] + ; CHECK: $q1 = COPY [[RES_MI]] + ; CHECK: $q2 = COPY [[RES_HI]] - %0(<2 x s64>) = COPY %q0 - %1(<2 x s64>) = COPY %q1 - %2(<2 x s64>) = COPY %q2 - %3(<2 x s64>) = COPY %q3 + %0(<2 x s64>) = COPY $q0 + %1(<2 x s64>) = COPY $q1 + %2(<2 x s64>) = COPY $q2 + %3(<2 x s64>) = COPY $q3 %4(<6 x s64>) = G_MERGE_VALUES %0, %1, %2 %5(<6 x s64>) = G_MERGE_VALUES %1, %2, %3 %6(<6 x s64>) = G_ADD %4, %5 %7(<2 x s64>), %8(<2 x s64>), %9(<2 x s64>) = G_UNMERGE_VALUES %6 - %q0 = COPY %7 - %q1 = COPY %8 - %q2 = COPY %9 + $q0 = COPY %7 + $q1 = COPY %8 + $q2 = COPY %9 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-and.mir b/test/CodeGen/AArch64/GlobalISel/legalize-and.mir index b2f24a738be2..fdcf79e55367 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-and.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-and.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -22,25 +22,25 @@ registers: - { id: 6, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_scalar_and_small - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; CHECK: %w0 = COPY [[COPY2]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: %x0 = COPY [[COPY3]](s64) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[COPY3]](s64) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_AND %2, %3 %6(s32) = G_ANYEXT %4 - %w0 = COPY %6 + $w0 = COPY %6 %5(s64) = G_ANYEXT %2 - %x0 = COPY %5 + $x0 = COPY %5 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir b/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir index b77d5e9a1d6d..71cb9270accb 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=legalizer -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -14,72 +14,72 @@ name: cmpxchg_i8 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[CST2:%[0-9]+]]:_(s8) = G_TRUNC [[CST]] ; CHECK: [[RES:%[0-9]+]]:_(s8) = G_ATOMICRMW_ADD [[COPY]](p0), [[CST2]] :: (load store monotonic 1 on %ir.addr) ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]] - ; CHECK: %w0 = COPY [[RES2]] - %0:_(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[RES2]] + %0:_(p0) = COPY $x0 %1:_(s8) = G_CONSTANT i8 1 %2:_(s8) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 1 on %ir.addr) %3:_(s32) = G_ANYEXT %2 - %w0 = COPY %3(s32) + $w0 = COPY %3(s32) ... --- name: cmpxchg_i16 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[CST2:%[0-9]+]]:_(s16) = G_TRUNC [[CST]] ; CHECK: [[RES:%[0-9]+]]:_(s16) = G_ATOMICRMW_ADD [[COPY]](p0), [[CST2]] :: (load store monotonic 2 on %ir.addr) ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]] - ; CHECK: %w0 = COPY [[RES2]] - %0:_(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[RES2]] + %0:_(p0) = COPY $x0 %1:_(s16) = G_CONSTANT i16 1 %2:_(s16) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 2 on %ir.addr) %3:_(s32) = G_ANYEXT %2 - %w0 = COPY %3(s32) + $w0 = COPY %3(s32) ... --- name: cmpxchg_i32 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[CST]] :: (load store monotonic 4 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:_(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[RES]] + %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 4 on %ir.addr) - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- name: cmpxchg_i64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i64 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ATOMICRMW_ADD [[COPY]](p0), [[CST]] :: (load store monotonic 8 on %ir.addr) - ; CHECK: %x0 = COPY [[RES]] - %0:_(p0) = COPY %x0 + ; CHECK: $x0 = COPY [[RES]] + %0:_(p0) = COPY $x0 %1:_(s64) = G_CONSTANT i64 1 %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 8 on %ir.addr) - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir index dba2ba8d6836..ca5646a1c7b2 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -30,13 +30,13 @@ registers: - { id: 14, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_icmp - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY]](s64), [[COPY1]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; CHECK: %w0 = COPY [[COPY2]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] @@ -45,27 +45,27 @@ body: | ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] ; CHECK: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) - ; CHECK: %w0 = COPY [[COPY3]](s32) + ; CHECK: $w0 = COPY [[COPY3]](s32) ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](s64) ; CHECK: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[INTTOPTR]] ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) - ; CHECK: %w0 = COPY [[COPY4]](s32) - %0(s64) = COPY %x0 - %1(s64) = COPY %x0 + ; CHECK: $w0 = COPY [[COPY4]](s32) + %0(s64) = COPY $x0 + %1(s64) = COPY $x0 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s1) = G_ICMP intpred(sge), %0, %1 %11(s32) = G_ANYEXT %4 - %w0 = COPY %11 + $w0 = COPY %11 %8(s1) = G_ICMP intpred(ult), %2, %3 %12(s32) = G_ANYEXT %8 - %w0 = COPY %12 + $w0 = COPY %12 %9(p0) = G_INTTOPTR %0(s64) %10(s1) = G_ICMP intpred(eq), %9(p0), %9(p0) %14(s32) = G_ANYEXT %10 - %w0 = COPY %14 + $w0 = COPY %14 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir b/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir index 633033670cc9..2c7788511590 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=legalizer -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -13,24 +13,24 @@ name: cmpxchg_i32 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 8 on %ir.addr) ; CHECK: [[SRES:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[RES]](s32), [[CMP]] ; CHECK: [[SRES32:%[0-9]+]]:_(s32) = COPY [[SRES]] ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[RES]], [[SRES32]] - ; CHECK: %w0 = COPY [[MUL]] - %0:_(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[MUL]] + %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_CONSTANT i32 1 %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic 8 on %ir.addr) %5:_(s32) = G_ANYEXT %4 %6:_(s32) = G_MUL %3, %5 - %w0 = COPY %6(s32) + $w0 = COPY %6(s32) ... --- @@ -38,22 +38,22 @@ name: cmpxchg_i64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i64 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 8 on %ir.addr) ; CHECK: [[SRES:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[RES]](s64), [[CMP]] ; CHECK: [[SRES64:%[0-9]+]]:_(s64) = G_ANYEXT [[SRES]] ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[RES]], [[SRES64]] - ; CHECK: %x0 = COPY [[MUL]] - %0:_(p0) = COPY %x0 + ; CHECK: $x0 = COPY [[MUL]] + %0:_(p0) = COPY $x0 %1:_(s64) = G_CONSTANT i64 0 %2:_(s64) = G_CONSTANT i64 1 %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic 8 on %ir.addr) %5:_(s64) = G_ANYEXT %4 %6:_(s64) = G_MUL %3, %5 - %x0 = COPY %6(s64) + $x0 = COPY %6(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir b/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir index 898cd12d1180..e8a73b717738 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=legalizer -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -14,82 +14,82 @@ name: cmpxchg_i8 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[CMPT:%[0-9]+]]:_(s8) = G_TRUNC [[CMP]] ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[CSTT:%[0-9]+]]:_(s8) = G_TRUNC [[CST]] ; CHECK: [[RES:%[0-9]+]]:_(s8) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMPT]], [[CSTT]] :: (load store monotonic 1 on %ir.addr) ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]](s8) - ; CHECK: %w0 = COPY [[RES2]] - %0:_(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[RES2]] + %0:_(p0) = COPY $x0 %1:_(s8) = G_CONSTANT i8 0 %2:_(s8) = G_CONSTANT i8 1 %3:_(s8) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 1 on %ir.addr) %4:_(s32) = G_ANYEXT %3 - %w0 = COPY %4(s32) + $w0 = COPY %4(s32) ... --- name: cmpxchg_i16 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[CMPT:%[0-9]+]]:_(s16) = G_TRUNC [[CMP]] ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[CSTT:%[0-9]+]]:_(s16) = G_TRUNC [[CST]] ; CHECK: [[RES:%[0-9]+]]:_(s16) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMPT]], [[CSTT]] :: (load store monotonic 2 on %ir.addr) ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]](s16) - ; CHECK: %w0 = COPY [[RES2]] - %0:_(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[RES2]] + %0:_(p0) = COPY $x0 %1:_(s16) = G_CONSTANT i16 0 %2:_(s16) = G_CONSTANT i16 1 %3:_(s16) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 2 on %ir.addr) %4:_(s32) = G_ANYEXT %3 - %w0 = COPY %4(s32) + $w0 = COPY %4(s32) ... --- name: cmpxchg_i32 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 4 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:_(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[RES]] + %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_CONSTANT i32 1 %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 4 on %ir.addr) - %w0 = COPY %3(s32) + $w0 = COPY %3(s32) ... --- name: cmpxchg_i64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i64 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 8 on %ir.addr) - ; CHECK: %x0 = COPY [[RES]] - %0:_(p0) = COPY %x0 + ; CHECK: $x0 = COPY [[RES]] + %0:_(p0) = COPY $x0 %1:_(s64) = G_CONSTANT i64 0 %2:_(s64) = G_CONSTANT i64 1 %3:_(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 8 on %ir.addr) - %x0 = COPY %3(s64) + $x0 = COPY %3(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir index 9cf0f8fd0e71..3d0bd02a9811 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -8,84 +8,116 @@ define void @test_combines_3() { ret void } define void @test_combines_4() { ret void } define void @test_combines_5() { ret void } + define void @test_combines_6() { ret void } ... --- name: test_combines_2 body: | bb.0: - liveins: %w0 + liveins: $w0 ; Here the types don't match. ; CHECK-LABEL: name: test_combines_2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ADD]](s32) ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[MV]](s64), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s64), 0 - %0:_(s32) = COPY %w0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + %0:_(s32) = COPY $w0 %1:_(s32) = G_ADD %0, %0 %2:_(s64) = G_MERGE_VALUES %0, %1 %3:_(s1) = G_EXTRACT %2, 0 %5:_(s32) = G_ANYEXT %3 - %w0 = COPY %5 - %4:_(s64) = G_EXTRACT %2, 0 - %x0 = COPY %4 + $w0 = COPY %5 + %4:_(s64) = COPY %2 + $x0 = COPY %4 ... --- name: test_combines_3 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_combines_3 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ADD]] - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s32) = G_ADD %0, %0 %2:_(s64) = G_MERGE_VALUES %0, %1 %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2 %5:_(s32) = G_ADD %3, %4 - %w0 = COPY %5 + $w0 = COPY %5 ... --- name: test_combines_4 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_combines_4 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY1]], [[COPY1]] - %0:_(s64) = COPY %x0 + %0:_(s64) = COPY $x0 %1:_(s128) = G_MERGE_VALUES %0, %0 %2:_(s64) = G_EXTRACT %1, 0 %3:_(s64) = G_ADD %2, %2 - %w0 = COPY %3 + $x0 = COPY %3 ... --- name: test_combines_5 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_combines_5 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ADD]] - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s32) = G_ADD %0, %0 %2:_(s64) = G_MERGE_VALUES %0, %1 - %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2 + %6:_(s64) = COPY %2 + %7:_(s64) = COPY %6 + %8:_(s64) = COPY %7 + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %8 + %5:_(s32) = G_ADD %3, %4 + $w0 = COPY %5 +... + +--- +name: test_combines_6 +body: | + bb.0: + liveins: $w0 + + ; CHECK-LABEL: name: test_combines_6 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ADD]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ADD]] + ; CHECK: $w0 = COPY [[ADD1]](s32) + ; CHECK: $x0 = COPY [[COPY2]](s64) + %0:_(s32) = COPY $w0 + + %1:_(s32) = G_ADD %0, %0 + %2:_(s64) = G_MERGE_VALUES %0, %1 + %6:_(s64) = COPY %2 + %7:_(s64) = COPY %6 + %8:_(s64) = COPY %7 + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %8 %5:_(s32) = G_ADD %3, %4 - %w0 = COPY %5 + $w0 = COPY %5 + $x0 = COPY %7 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir index 4ed84ed79bba..6c20fb701b7c 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -31,34 +31,34 @@ body: | ; CHECK-LABEL: name: test_constant ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: %w0 = COPY [[COPY]](s32) + ; CHECK: $w0 = COPY [[COPY]](s32) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: %w0 = COPY [[COPY1]](s32) + ; CHECK: $w0 = COPY [[COPY1]](s32) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: %w0 = COPY [[COPY2]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK: %w0 = COPY [[C3]](s32) + ; CHECK: $w0 = COPY [[C3]](s32) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: %x0 = COPY [[C4]](s64) + ; CHECK: $x0 = COPY [[C4]](s64) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: %x0 = COPY [[C5]](s64) + ; CHECK: $x0 = COPY [[C5]](s64) %0(s1) = G_CONSTANT i1 0 %6:_(s32) = G_ANYEXT %0 - %w0 = COPY %6 + $w0 = COPY %6 %1(s8) = G_CONSTANT i8 42 %7:_(s32) = G_ANYEXT %1 - %w0 = COPY %7 + $w0 = COPY %7 %2(s16) = G_CONSTANT i16 65535 %8:_(s32) = G_ANYEXT %2 - %w0 = COPY %8 + $w0 = COPY %8 %3(s32) = G_CONSTANT i32 -1 - %w0 = COPY %3 + $w0 = COPY %3 %4(s64) = G_CONSTANT i64 1 - %x0 = COPY %4 + $x0 = COPY %4 %5(s64) = G_CONSTANT i64 0 - %x0 = COPY %5 + $x0 = COPY %5 ... --- @@ -72,20 +72,20 @@ body: | ; CHECK-LABEL: name: test_fconstant ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK: %w0 = COPY [[C]](s32) + ; CHECK: $w0 = COPY [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 - ; CHECK: %x0 = COPY [[C1]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT half 0xH0000 + ; CHECK: $x0 = COPY [[C1]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[C2]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CHECK: %w0 = COPY [[ANYEXT]](s32) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) %0(s32) = G_FCONSTANT float 1.0 - %w0 = COPY %0 + $w0 = COPY %0 %1(s64) = G_FCONSTANT double 2.0 - %x0 = COPY %1 + $x0 = COPY %1 %2(s16) = G_FCONSTANT half 0.0 %3:_(s32) = G_ANYEXT %2 - %w0 = COPY %3 + $w0 = COPY %3 ... --- @@ -98,8 +98,8 @@ body: | ; CHECK-LABEL: name: test_global ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0) - ; CHECK: %x0 = COPY [[PTRTOINT]](s64) + ; CHECK: $x0 = COPY [[PTRTOINT]](s64) %0(p0) = G_GLOBAL_VALUE @var %1:_(s64) = G_PTRTOINT %0 - %x0 = COPY %1 + $x0 = COPY %1 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-div.mir b/test/CodeGen/AArch64/GlobalISel/legalize-div.mir index 38be3a950e15..a21b83bb5ca4 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-div.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-div.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -21,10 +21,10 @@ registers: - { id: 5, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_div - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]] @@ -35,7 +35,7 @@ body: | ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]] ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[ASHR]], [[ASHR1]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; CHECK: %w0 = COPY [[COPY2]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C2]] @@ -44,20 +44,20 @@ body: | ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C3]] ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) - ; CHECK: %w0 = COPY [[COPY3]](s32) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $w0 = COPY [[COPY3]](s32) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_SDIV %2, %3 %6:_(s32) = G_ANYEXT %4 - %w0 = COPY %6 + $w0 = COPY %6 %5(s8) = G_UDIV %2, %3 %7:_(s32) = G_ANYEXT %5 - %w0 = COPY %7 + $w0 = COPY %7 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll index 01f955bc1d10..23797a4878cf 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll +++ b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll @@ -15,19 +15,11 @@ declare void @_Unwind_Resume(i8*) ; CHECK: [[LP]].{{[a-z]+}} (landing-pad): ; CHECK: EH_LABEL -; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[STRUCT_PTR:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR]](p0) - -; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY %x1 -; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]] -; CHECK: [[STRUCT_SEL:%[0-9]+]]:_(s64) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 0 - -; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[STRUCT_PTR]](s64) -; CHECK: G_STORE [[PTR]](p0), {{%[0-9]+}}(p0) - -; CHECK: [[SEL_TMP:%[0-9]+]]:_(s32) = G_EXTRACT [[STRUCT_SEL]](s64), 0 -; CHECK: [[SEL:%[0-9]+]]:_(s32) = COPY [[SEL_TMP]] -; CHECK: G_STORE [[SEL]](s32), {{%[0-9]+}}(p0) +; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY $x1 +; CHECK: [[SEL_PTR_INT:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]](p0) +; CHECK: G_STORE [[PTR]](p0), %0(p0) :: (store 8 into %ir.exn.slot) +; CHECK: G_STORE [[SEL_PTR_INT]](s32), %1(p0) :: (store 4 into %ir.ehselector.slot) define void @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { %exn.slot = alloca i8* diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir b/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir index 1bd25bdae74e..cf4f687408f3 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -34,110 +34,110 @@ registers: - { id: 18, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_ext - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: %w0 = COPY [[TRUNC]](s32) + ; CHECK: $w0 = COPY [[TRUNC]](s32) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: %w0 = COPY [[TRUNC1]](s32) + ; CHECK: $w0 = COPY [[TRUNC1]](s32) ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: %w0 = COPY [[TRUNC2]](s32) + ; CHECK: $w0 = COPY [[TRUNC2]](s32) ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: %w0 = COPY [[TRUNC3]](s32) + ; CHECK: $w0 = COPY [[TRUNC3]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: %x0 = COPY [[COPY1]](s64) + ; CHECK: $x0 = COPY [[COPY1]](s64) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; CHECK: %x0 = COPY [[AND]](s64) + ; CHECK: $x0 = COPY [[AND]](s64) ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: %x0 = COPY [[COPY3]](s64) + ; CHECK: $x0 = COPY [[COPY3]](s64) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY4]], [[C1]] ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C1]] - ; CHECK: %x0 = COPY [[ASHR]](s64) + ; CHECK: $x0 = COPY [[ASHR]](s64) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[TRUNC4]], [[C2]] ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C2]] - ; CHECK: %w0 = COPY [[ASHR1]](s32) + ; CHECK: $w0 = COPY [[ASHR1]](s32) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC5]], [[C3]] - ; CHECK: %w0 = COPY [[AND1]](s32) + ; CHECK: $w0 = COPY [[AND1]](s32) ; CHECK: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: %w0 = COPY [[TRUNC6]](s32) + ; CHECK: $w0 = COPY [[TRUNC6]](s32) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[TRUNC7:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC7]], [[C4]] - ; CHECK: %w0 = COPY [[AND2]](s32) + ; CHECK: $w0 = COPY [[AND2]](s32) ; CHECK: [[TRUNC8:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: %w0 = COPY [[TRUNC8]](s32) + ; CHECK: $w0 = COPY [[TRUNC8]](s32) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[TRUNC9:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[TRUNC9]], [[C5]] ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C5]] - ; CHECK: %w0 = COPY [[ASHR2]](s32) + ; CHECK: $w0 = COPY [[ASHR2]](s32) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[TRUNC10:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC3]]4(s32) ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]]1, [[TRUNC3]]2 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[TRUNC3]]3(s32) - ; CHECK: %w0 = COPY [[COPY6]](s32) + ; CHECK: $w0 = COPY [[COPY6]](s32) ; CHECK: [[TRUNC11:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: %w0 = COPY [[TRUNC11]](s32) + ; CHECK: $w0 = COPY [[TRUNC11]](s32) ; CHECK: [[TRUNC12:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: %w0 = COPY [[TRUNC12]](s32) + ; CHECK: $w0 = COPY [[TRUNC12]](s32) ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[TRUNC12]](s32) - ; CHECK: %x0 = COPY [[FPEXT]](s64) - %0(s64) = COPY %x0 + ; CHECK: $x0 = COPY [[FPEXT]](s64) + %0(s64) = COPY $x0 %1(s1) = G_TRUNC %0 %19:_(s32) = G_ANYEXT %1 - %w0 = COPY %19 + $w0 = COPY %19 %2(s8) = G_TRUNC %0 %20:_(s32) = G_ANYEXT %2 - %w0 = COPY %20 + $w0 = COPY %20 %3(s16) = G_TRUNC %0 %21:_(s32) = G_ANYEXT %3 - %w0 = COPY %21 + $w0 = COPY %21 %4(s32) = G_TRUNC %0 - %w0 = COPY %4 + $w0 = COPY %4 %5(s64) = G_ANYEXT %1 - %x0 = COPY %5 + $x0 = COPY %5 %6(s64) = G_ZEXT %2 - %x0 = COPY %6 + $x0 = COPY %6 %7(s64) = G_ANYEXT %3 - %x0 = COPY %7 + $x0 = COPY %7 %8(s64) = G_SEXT %4 - %x0 = COPY %8 + $x0 = COPY %8 %9(s32) = G_SEXT %1 - %w0 = COPY %9 + $w0 = COPY %9 %10(s32) = G_ZEXT %2 - %w0 = COPY %10 + $w0 = COPY %10 %11(s32) = G_ANYEXT %3 - %w0 = COPY %11 + $w0 = COPY %11 %12(s32) = G_ZEXT %1 - %w0 = COPY %12 + $w0 = COPY %12 %13(s32) = G_ANYEXT %2 - %w0 = COPY %13 + $w0 = COPY %13 %14(s32) = G_SEXT %3 - %w0 = COPY %14 + $w0 = COPY %14 %15(s8) = G_ZEXT %1 %22:_(s32) = G_ANYEXT %15 - %w0 = COPY %22 + $w0 = COPY %22 %16(s16) = G_ANYEXT %2 %23:_(s32) = G_ANYEXT %16 - %w0 = COPY %23 + $w0 = COPY %23 %17(s32) = G_TRUNC %0 - %w0 = COPY %17 + $w0 = COPY %17 %18(s64) = G_FPEXT %17 - %x0 = COPY %18 + $x0 = COPY %18 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir b/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir new file mode 100644 index 000000000000..816484108d25 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir @@ -0,0 +1,24 @@ +# RUN: llc -O0 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + define void @test_extload(i8* %addr) { + entry: + ret void + } +... + +--- +name: test_extload +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_extload + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_LOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_LOAD %0 :: (load 1 from %ir.addr) + $w0 = COPY %1 +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir b/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir index 94a403271797..3444254d070d 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir @@ -1,27 +1,27 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_extracts_1 body: | bb.0: - liveins: %w0 + liveins: $w0 ; Low part of extraction takes entirity of the low register entirely, so ; value stored is forwarded directly from first load. ; CHECK-LABEL: name: test_extracts_1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 16) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8) ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store 8) ; CHECK: RET_ReallyLR - %0:_(s64) = COPY %x0 - %1:_(s32) = COPY %w1 - %2:_(p0) = COPY %x2 + %0:_(s64) = COPY $x0 + %1:_(s32) = COPY $w1 + %2:_(p0) = COPY $x2 %3:_(s128) = G_LOAD %2(p0) :: (load 16) %4:_(s64) = G_EXTRACT %3(s128), 0 G_STORE %4(s64), %2(p0) :: (store 8) @@ -32,24 +32,24 @@ body: | name: test_extracts_2 body: | bb.0: - liveins: %w0 + liveins: $w0 ; Low extraction wipes takes whole low register. High extraction is real. ; CHECK-LABEL: name: test_extracts_2 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 16) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8) ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s64), 0 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store 8) ; CHECK: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 4) ; CHECK: RET_ReallyLR - %0:_(s64) = COPY %x0 - %1:_(s32) = COPY %w1 - %2:_(p0) = COPY %x2 + %0:_(s64) = COPY $x0 + %1:_(s32) = COPY $w1 + %2:_(p0) = COPY $x2 %3:_(s128) = G_LOAD %2(p0) :: (load 16) %4:_(s64) = G_EXTRACT %3(s128), 0 %5:_(s32) = G_EXTRACT %3(s128), 64 @@ -62,22 +62,22 @@ body: | name: test_extracts_3 body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: test_extracts_3 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32 ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[EXTRACT]](s32), [[EXTRACT1]](s32) - ; CHECK: %x0 = COPY [[MV]](s64) + ; CHECK: $x0 = COPY [[MV]](s64) ; CHECK: RET_ReallyLR - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 %2:_(s128) = G_MERGE_VALUES %0, %1 %3:_(s64) = G_EXTRACT %2, 32 - %x0 = COPY %3 + $x0 = COPY %3 RET_ReallyLR ... @@ -85,19 +85,19 @@ body: | name: test_extracts_4 body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: test_extracts_4 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) - ; CHECK: %w0 = COPY [[COPY1]](s32) + ; CHECK: $w0 = COPY [[COPY1]](s32) ; CHECK: RET_ReallyLR - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 %2:_(s128) = G_MERGE_VALUES %0, %1 %3:_(s32) = G_EXTRACT %2, 32 - %w0 = COPY %3 + $w0 = COPY %3 RET_ReallyLR ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir b/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir index 1c0c183e2db4..7a688e7eb936 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -23,25 +23,25 @@ registers: - { id: 7, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_icmp - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oge), [[COPY]](s64), [[COPY1]] - ; CHECK: %w0 = COPY [[FCMP]](s32) + ; CHECK: $w0 = COPY [[FCMP]](s32) ; CHECK: [[FCMP1:%[0-9]+]]:_(s32) = G_FCMP floatpred(uno), [[TRUNC]](s32), [[TRUNC1]] - ; CHECK: %w0 = COPY [[FCMP1]](s32) - %0(s64) = COPY %x0 - %1(s64) = COPY %x0 + ; CHECK: $w0 = COPY [[FCMP1]](s32) + %0(s64) = COPY $x0 + %1(s64) = COPY $x0 %2(s32) = G_TRUNC %0 %3(s32) = G_TRUNC %1 %4(s32) = G_FCMP floatpred(oge), %0, %1 - %w0 = COPY %4 + $w0 = COPY %4 %5(s32) = G_FCMP floatpred(uno), %2, %3 - %w0 = COPY %5 + $w0 = COPY %5 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-fneg.mir b/test/CodeGen/AArch64/GlobalISel/legalize-fneg.mir index e7dc314f034f..59de652d7446 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-fneg.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-fneg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -20,15 +20,15 @@ registers: - { id: 1, class: _ } body: | bb.1: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: test_fneg_f32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -0.000000e+00 ; CHECK: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[C]], [[COPY]] - ; CHECK: %s0 = COPY [[FSUB]](s32) - %0(s32) = COPY %s0 + ; CHECK: $s0 = COPY [[FSUB]](s32) + %0(s32) = COPY $s0 %1(s32) = G_FNEG %0 - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- name: test_fneg_f64 @@ -37,13 +37,13 @@ registers: - { id: 1, class: _ } body: | bb.1: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: test_fneg_f64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00 ; CHECK: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[C]], [[COPY]] - ; CHECK: %d0 = COPY [[FSUB]](s64) - %0(s64) = COPY %d0 + ; CHECK: $d0 = COPY [[FSUB]](s64) + %0(s64) = COPY $d0 %1(s64) = G_FNEG %0 - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir b/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir index c03c190486f5..d20016ced574 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -29,112 +29,112 @@ name: test_fptosi_s32_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_fptosi_s32_s32 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[DEF]](s32) - ; CHECK: %w0 = COPY [[FPTOSI]](s32) + ; CHECK: $w0 = COPY [[FPTOSI]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_FPTOSI %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- name: test_fptoui_s32_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_fptoui_s32_s32 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[DEF]](s32) - ; CHECK: %w0 = COPY [[FPTOUI]](s32) + ; CHECK: $w0 = COPY [[FPTOUI]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_FPTOUI %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- name: test_fptosi_s32_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_fptosi_s32_s64 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[DEF]](s32) - ; CHECK: %w0 = COPY [[FPTOSI]](s32) + ; CHECK: $w0 = COPY [[FPTOSI]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_FPTOSI %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- name: test_fptoui_s32_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_fptoui_s32_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s64) - ; CHECK: %w0 = COPY [[FPTOUI]](s32) - %0:_(s64) = COPY %x0 + ; CHECK: $w0 = COPY [[FPTOUI]](s32) + %0:_(s64) = COPY $x0 %1:_(s32) = G_FPTOUI %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- name: test_fptosi_s64_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_fptosi_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[FPTOSI:%[0-9]+]]:_(s64) = G_FPTOSI [[COPY]](s32) - ; CHECK: %x0 = COPY [[FPTOSI]](s64) - %0:_(s32) = COPY %w0 + ; CHECK: $x0 = COPY [[FPTOSI]](s64) + %0:_(s32) = COPY $w0 %1:_(s64) = G_FPTOSI %0 - %x0 = COPY %1 + $x0 = COPY %1 ... --- name: test_fptoui_s64_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_fptoui_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[FPTOUI:%[0-9]+]]:_(s64) = G_FPTOUI [[COPY]](s32) - ; CHECK: %x0 = COPY [[FPTOUI]](s64) - %0:_(s32) = COPY %w0 + ; CHECK: $x0 = COPY [[FPTOUI]](s64) + %0:_(s32) = COPY $w0 %1:_(s64) = G_FPTOUI %0 - %x0 = COPY %1 + $x0 = COPY %1 ... --- name: test_fptosi_s64_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_fptosi_s64_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[FPTOSI:%[0-9]+]]:_(s64) = G_FPTOSI [[COPY]](s64) - ; CHECK: %x0 = COPY [[FPTOSI]](s64) - %0:_(s64) = COPY %x0 + ; CHECK: $x0 = COPY [[FPTOSI]](s64) + %0:_(s64) = COPY $x0 %1:_(s64) = G_FPTOSI %0 - %x0 = COPY %1 + $x0 = COPY %1 ... --- name: test_fptoui_s64_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_fptoui_s64_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[FPTOUI:%[0-9]+]]:_(s64) = G_FPTOUI [[COPY]](s64) - ; CHECK: %x0 = COPY [[FPTOUI]](s64) - %0:_(s64) = COPY %x0 + ; CHECK: $x0 = COPY [[FPTOUI]](s64) + %0:_(s64) = COPY $x0 %1:_(s64) = G_FPTOUI %0 - %x0 = COPY %1 + $x0 = COPY %1 ... @@ -143,93 +143,94 @@ body: | name: test_fptosi_s1_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_fptosi_s1_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) - ; CHECK: %x0 = COPY [[TRUNC]](s1) - %0:_(s32) = COPY %w0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[FPTOSI]](s32) + ; CHECK: $x0 = COPY [[ANYEXT]](s64) + %0:_(s32) = COPY $w0 %1:_(s1) = G_FPTOSI %0 - %x0 = COPY %1 + %2:_(s64) = G_ANYEXT %1 + $x0 = COPY %2 ... --- name: test_fptoui_s1_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_fptoui_s1_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[FPTOUI]](s32) - ; CHECK: %w0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY %w0 + ; CHECK: $w0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $w0 %1:_(s1) = G_FPTOUI %0 %2:_(s32) = G_ANYEXT %1 - %w0 = COPY %2 + $w0 = COPY %2 ... --- name: test_fptosi_s8_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_fptosi_s8_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[FPTOSI]](s32) - ; CHECK: %w0 = COPY [[COPY1]](s32) - %0:_(s64) = COPY %x0 + ; CHECK: $w0 = COPY [[COPY1]](s32) + %0:_(s64) = COPY $x0 %1:_(s8) = G_FPTOSI %0 %2:_(s32) = G_ANYEXT %1 - %w0 = COPY %2 + $w0 = COPY %2 ... --- name: test_fptoui_s8_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_fptoui_s8_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s64) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[FPTOUI]](s32) - ; CHECK: %w0 = COPY [[COPY1]](s32) - %0:_(s64) = COPY %x0 + ; CHECK: $w0 = COPY [[COPY1]](s32) + %0:_(s64) = COPY $x0 %1:_(s8) = G_FPTOUI %0 %2:_(s32) = G_ANYEXT %1 - %w0 = COPY %2 + $w0 = COPY %2 ... --- name: test_fptosi_s16_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_fptosi_s16_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[FPTOSI]](s32) - ; CHECK: %w0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY %w0 + ; CHECK: $w0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $w0 %1:_(s16) = G_FPTOSI %0 %2:_(s32) = G_ANYEXT %1 - %w0 = COPY %2 + $w0 = COPY %2 ... --- name: test_fptoui_s16_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_fptoui_s16_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[FPTOUI]](s32) - ; CHECK: %w0 = COPY [[COPY1]](s32) - %0:_(s32) = COPY %w0 + ; CHECK: $w0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $w0 %1:_(s16) = G_FPTOUI %0 %2:_(s32) = G_ANYEXT %1 - %w0 = COPY %2 + $w0 = COPY %2 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir b/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir index 67310d10336e..f7d77c72a384 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -19,20 +19,20 @@ registers: - { id: 3, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_gep_small - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY2]], [[C]] ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]] ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[ASHR]](s64) - ; CHECK: %x0 = COPY [[GEP]](p0) - %0(p0) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[GEP]](p0) + %0(p0) = COPY $x0 + %1(s64) = COPY $x1 %2(s8) = G_TRUNC %1 %3(p0) = G_GEP %0, %2(s8) - %x0 = COPY %3 + $x0 = COPY %3 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir b/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir index b0de3fc8092a..899eb67fc6ba 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -14,13 +14,13 @@ registers: - { id: 0, class: _ } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_copy - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: %x0 = COPY [[COPY]](s64) - %0(s64) = COPY %x0 - %x0 = COPY %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]](s64) + %0(s64) = COPY $x0 + $x0 = COPY %0 ... --- diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir index d430eb91ea52..8afe9c86b20a 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -16,7 +16,7 @@ name: test_inserts_1 body: | bb.0: - liveins: %w0 + liveins: $w0 ; Low part of insertion wipes out the old register entirely, so %0 gets ; forwarded to the G_STORE. Hi part is unchanged so (split) G_LOAD gets @@ -26,9 +26,9 @@ body: | ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD ; CHECK: G_STORE %0(s64) ; CHECK: G_STORE [[HI]] - %0:_(s64) = COPY %x0 - %1:_(s32) = COPY %w1 - %2:_(p0) = COPY %x2 + %0:_(s64) = COPY $x0 + %1:_(s32) = COPY $w1 + %2:_(p0) = COPY $x2 %3:_(s128) = G_LOAD %2(p0) :: (load 16) %4:_(s128) = G_INSERT %3(s128), %0(s64), 0 G_STORE %4(s128), %2(p0) :: (store 16) @@ -39,7 +39,7 @@ body: | name: test_inserts_2 body: | bb.0: - liveins: %w0 + liveins: $w0 ; Low insertion wipes out the old register entirely, so %0 gets forwarded ; to the G_STORE again. Second insertion is real. @@ -49,9 +49,9 @@ body: | ; CHECK: [[NEWHI:%[0-9]+]]:_(s64) = G_INSERT [[HI]], %1(s32), 0 ; CHECK: G_STORE %0(s64) ; CHECK: G_STORE [[NEWHI]] - %0:_(s64) = COPY %x0 - %1:_(s32) = COPY %w1 - %2:_(p0) = COPY %x2 + %0:_(s64) = COPY $x0 + %1:_(s32) = COPY $w1 + %2:_(p0) = COPY $x2 %3:_(s128) = G_LOAD %2(p0) :: (load 16) %4:_(s128) = G_INSERT %3(s128), %0(s64), 0 %5:_(s128) = G_INSERT %4(s128), %1(s32), 64 @@ -63,7 +63,7 @@ body: | name: test_inserts_3 body: | bb.0: - liveins: %w0 + liveins: $w0 ; I'm not entirely convinced inserting a p0 into an s64 is valid, but it's ; certainly better than the alternative of directly forwarding the value @@ -74,9 +74,9 @@ body: | ; CHECK: [[NEWLO:%[0-9]+]]:_(s64) = G_PTRTOINT %0(p0) ; CHECK: G_STORE [[NEWLO]](s64) ; CHECK: G_STORE [[HI]] - %0:_(p0) = COPY %x0 - %1:_(s32) = COPY %w1 - %2:_(p0) = COPY %x2 + %0:_(p0) = COPY $x0 + %1:_(s32) = COPY $w1 + %2:_(p0) = COPY $x2 %3:_(s128) = G_LOAD %2(p0) :: (load 16) %4:_(s128) = G_INSERT %3(s128), %0(p0), 0 G_STORE %4(s128), %2(p0) :: (store 16) @@ -87,18 +87,18 @@ body: | name: test_inserts_4 body: | bb.0: - liveins: %w0 + liveins: $w0 ; A narrow insert gets surrounded by a G_ANYEXT/G_TRUNC pair. ; CHECK-LABEL: name: test_inserts_4 ; CHECK: [[VALEXT:%[0-9]+]]:_(s32) = COPY %2(s32) ; CHECK: [[VAL:%[0-9]+]]:_(s32) = G_INSERT [[VALEXT]], %1(s1), 0 ; CHECK: %5:_(s8) = G_TRUNC [[VAL]](s32) - %4:_(s32) = COPY %w0 + %4:_(s32) = COPY $w0 %0:_(s1) = G_TRUNC %4 - %5:_(s32) = COPY %w1 + %5:_(s32) = COPY $w1 %1:_(s8) = G_TRUNC %5 - %2:_(p0) = COPY %x2 + %2:_(p0) = COPY $x2 %3:_(s8) = G_INSERT %1(s8), %0(s1), 0 G_STORE %3(s8), %2(p0) :: (store 1) RET_ReallyLR @@ -108,7 +108,7 @@ body: | name: test_inserts_5 body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: test_inserts_5 @@ -117,13 +117,13 @@ body: | ; CHECK: [[INS_HI:%[0-9]+]]:_(s32) = G_EXTRACT %2(s64), 32 ; CHECK: [[VAL_HI:%[0-9]+]]:_(s64) = G_INSERT %1, [[INS_HI]](s32), 0 ; CHECK: %4:_(s128) = G_MERGE_VALUES [[VAL_LO]](s64), [[VAL_HI]](s64) - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 - %2:_(s64) = COPY %x2 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 %3:_(s128) = G_MERGE_VALUES %0, %1 %4:_(s128) = G_INSERT %3, %2, 32 %5:_(s64) = G_TRUNC %4 - %x0 = COPY %5 + $x0 = COPY %5 RET_ReallyLR ... @@ -131,19 +131,19 @@ body: | name: test_inserts_6 body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: test_inserts_6 ; CHECK: [[VAL_LO:%[0-9]+]]:_(s64) = G_INSERT %0, %2(s32), 32 ; CHECK: %4:_(s128) = G_MERGE_VALUES [[VAL_LO]](s64), %1(s64) - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 - %2:_(s32) = COPY %w2 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s32) = COPY $w2 %3:_(s128) = G_MERGE_VALUES %0, %1 %4:_(s128) = G_INSERT %3, %2, 32 %5:_(s64) = G_TRUNC %4 - %x0 = COPY %5 + $x0 = COPY %5 RET_ReallyLR ... @@ -151,19 +151,19 @@ body: | name: test_inserts_nonpow2 body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: test_inserts_nonpow2 - ; CHECK: [[C:%[0-9]+]]:_(s64) = COPY %x3 - ; CHECK: %x0 = COPY [[C]] - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 - %2:_(s64) = COPY %x2 - %3:_(s64) = COPY %x3 + ; CHECK: [[C:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: $x0 = COPY [[C]] + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = COPY $x3 %4:_(s192) = G_MERGE_VALUES %0, %1, %2 %5:_(s192) = G_INSERT %4, %3, 0 %6:_(s64), %7:_(s64), %8:_(s64) = G_UNMERGE_VALUES %5 - %x0 = COPY %6 + $x0 = COPY %6 RET_ReallyLR ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-inttoptr-xfail-1.mir b/test/CodeGen/AArch64/GlobalISel/legalize-inttoptr-xfail-1.mir new file mode 100644 index 000000000000..32c4ed82bb37 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-inttoptr-xfail-1.mir @@ -0,0 +1,38 @@ +# RUN: not llc -mtriple=aarch64-- -run-pass=legalizer %s -o - 2>&1 | FileCheck %s +# REQUIRES: asserts + +# This is to demonstrate what kind of bugs we're missing w/o some kind +# of validation for LegalizerInfo: G_INTTOPTR could only be legal / +# could be legalized if its destination operand has a pointer type and +# its source - a scalar type. This is reversed in this test and the +# legalizer is expected to fail on it with an appropriate error +# message. Prior to LegalizerInfo::verify AArch64 legalizer had a +# subtle bug in its definition that caused it to accept the following +# MIR as legal. Namely, it checked that type index 0 is either s64 or +# p0 (in that order) and implicitly declared any type for type index 1 +# as legal. As LegalizerInfo::verify asserts on such a definition due +# to type index 1 not being covered it forces to review the definition +# and fix the mistake: check that type index 0 is p0 and type index 1 +# is s64 (in that order). + +# CHECK: LLVM ERROR: unable to legalize instruction: +# CHECK-SAME: %{{[0-9]+}}:_(s64) = G_INTTOPTR %{{[0-9]+}}:_(p0) +# CHECK-SAME: (in function: broken) + +--- +name: broken +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1: + liveins: $x0 + + %0:_(p0) = COPY $x0 + %1:_(s64) = G_INTTOPTR %0(p0) + $x0 = COPY %1(s64) + RET_ReallyLR implicit $x0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-inttoptr-xfail-2.mir b/test/CodeGen/AArch64/GlobalISel/legalize-inttoptr-xfail-2.mir new file mode 100644 index 000000000000..3584bfd12e66 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-inttoptr-xfail-2.mir @@ -0,0 +1,42 @@ +# RUN: not llc -mtriple=aarch64-- -run-pass=legalizer %s -o - 2>&1 | FileCheck %s +# REQUIRES: asserts + +# This is to demonstrate what kind of bugs we're missing w/o some kind +# of validation for LegalizerInfo: G_INTTOPTR could only be legal / +# could be legalized if its destination operand has a pointer type and +# its source - a scalar type of an appropriate size. This test meets +# the requirements for type index 0 (the pointer) and LLT-size +# requirements for type index 1 (64 bits for AArch64), but has a +# non-scalar (vector) type for type index 1. The Legalizer is expected +# to fail on it with an appropriate error message. Prior to +# LegalizerInfo::verify AArch64 legalizer had a subtle bug in its +# definition that caused it to accept the following MIR as legal. +# Namely, it checked that type index 0 is either s64 or p0 and +# implicitly declared any type for type index 1 as legal (as soon as +# its size is 64 bits). As LegalizerInfo::verify asserts on such a +# definition due to type index 1 not being covered by a specific +# action (not just `unsupportedIf`) it forces to review the definition +# and fix the mistake: check that type index 0 is p0 and type index 1 +# is s64. + +# CHECK: LLVM ERROR: unable to legalize instruction: +# CHECK-SAME: %{{[0-9]+}}:_(p0) = G_INTTOPTR %{{[0-9]+}}:_(<4 x s16>) +# CHECK-SAME: (in function: broken) + +--- +name: broken +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1: + liveins: $d0 + + %0:_(<4 x s16>) = COPY $d0 + %1:_(p0) = G_INTTOPTR %0(<4 x s16>) + $x0 = COPY %1(p0) + RET_ReallyLR implicit $x0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir b/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir index af9fb5d70d61..9539c54c5b09 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -29,104 +29,104 @@ name: test_sitofp_s32_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_sitofp_s32_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s32) = G_SITOFP %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- name: test_uitofp_s32_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_uitofp_s32_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s32) = G_UITOFP %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- name: test_sitofp_s32_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_sitofp_s32_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s64) - %0:_(s64) = COPY %x0 + %0:_(s64) = COPY $x0 %1:_(s32) = G_SITOFP %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- name: test_uitofp_s32_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_uitofp_s32_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s64) - %0:_(s64) = COPY %x0 + %0:_(s64) = COPY $x0 %1:_(s32) = G_UITOFP %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- name: test_sitofp_s64_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_sitofp_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s64) = G_SITOFP %0 - %w0 = COPY %1 + $x0 = COPY %1 ... --- name: test_uitofp_s64_s32 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_uitofp_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s64) = G_UITOFP %0 - %x0 = COPY %1 + $x0 = COPY %1 ... --- name: test_sitofp_s64_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_sitofp_s64_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY]](s64) - %0:_(s64) = COPY %x0 + %0:_(s64) = COPY $x0 %1:_(s64) = G_SITOFP %0 - %x0 = COPY %1 + $x0 = COPY %1 ... --- name: test_uitofp_s64_s64 body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_uitofp_s64_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY]](s64) - %0:_(s64) = COPY %x0 + %0:_(s64) = COPY $x0 %1:_(s64) = G_UITOFP %0 - %x0 = COPY %1 + $x0 = COPY %1 ... @@ -134,103 +134,103 @@ body: | name: test_sitofp_s32_s1 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_sitofp_s32_s1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]] ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]] ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s1) = G_TRUNC %0 %2:_(s32) = G_SITOFP %1 - %w0 = COPY %2 + $w0 = COPY %2 ... --- name: test_uitofp_s32_s1 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_uitofp_s32_s1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s1) = G_TRUNC %0 %2:_(s32) = G_UITOFP %1 - %w0 = COPY %2 + $w0 = COPY %2 ... --- name: test_sitofp_s64_s8 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_sitofp_s64_s8 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]] ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]] ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s8) = G_TRUNC %0 %2:_(s64) = G_SITOFP %1 - %x0 = COPY %2 + $x0 = COPY %2 ... --- name: test_uitofp_s64_s8 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_uitofp_s64_s8 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s8) = G_TRUNC %0 %2:_(s64) = G_UITOFP %1 - %x0 = COPY %2 + $x0 = COPY %2 ... --- name: test_sitofp_s32_s16 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_sitofp_s32_s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]] ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]] ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s16) = G_TRUNC %0 %2:_(s32) = G_SITOFP %1 - %w0 = COPY %2 + $w0 = COPY %2 ... --- name: test_uitofp_s32_s16 body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_uitofp_s32_s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) - %0:_(s32) = COPY %w0 + %0:_(s32) = COPY $w0 %1:_(s16) = G_TRUNC %0 %2:_(s32) = G_UITOFP %1 - %w0 = COPY %2 + $w0 = COPY %2 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir index 6e775da9e802..9a5630371599 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -27,49 +27,49 @@ registers: - { id: 8, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_load - %0(p0) = COPY %x0 + %0(p0) = COPY $x0 %1(s1) = G_LOAD %0 :: (load 1 from %ir.addr) %9:_(s32) = G_ANYEXT %1 - %w0 = COPY %9 + $w0 = COPY %9 ; CHECK: %2:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr) %2(s8) = G_LOAD %0 :: (load 1 from %ir.addr) %10:_(s32) = G_ANYEXT %2 - %w0 = COPY %10 + $w0 = COPY %10 ; CHECK: %3:_(s16) = G_LOAD %0(p0) :: (load 2 from %ir.addr) %3(s16) = G_LOAD %0 :: (load 2 from %ir.addr) %11:_(s32) = G_ANYEXT %3 - %w0 = COPY %11 + $w0 = COPY %11 ; CHECK: %4:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.addr) %4(s32) = G_LOAD %0 :: (load 4 from %ir.addr) - %w0 = COPY %4 + $w0 = COPY %4 ; CHECK: %5:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.addr) %5(s64) = G_LOAD %0 :: (load 8 from %ir.addr) - %x0 = COPY %5 + $x0 = COPY %5 %6(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr) %12:_(s64) = G_PTRTOINT %6 - %x0 = COPY %12 + $x0 = COPY %12 ; CHECK: %7:_(<2 x s32>) = G_LOAD %0(p0) :: (load 8 from %ir.addr) %7(<2 x s32>) = G_LOAD %0(p0) :: (load 8 from %ir.addr) %13:_(s64) = G_BITCAST %7 - %x0 = COPY %13 + $x0 = COPY %13 - ; CHECK: [[LOAD0:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 16 from %ir.addr) + ; CHECK: [[LOAD0:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.addr, align 16) ; CHECK: [[OFFSET1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[OFFSET1]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 16 from %ir.addr) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.addr + 8) ; CHECK: %8:_(s128) = G_MERGE_VALUES [[LOAD0]](s64), [[LOAD1]](s64) %8(s128) = G_LOAD %0(p0) :: (load 16 from %ir.addr) %14:_(s64) = G_TRUNC %8 - %x0 = COPY %14 + $x0 = COPY %14 ... --- @@ -85,11 +85,11 @@ registers: - { id: 7, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_store - %0(p0) = COPY %x0 - %1(s32) = COPY %w1 + %0(p0) = COPY $x0 + %1(s32) = COPY $w1 ; CHECK: [[C1:%.*]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[B:%.*]]:_(s32) = COPY %1(s32) @@ -120,10 +120,10 @@ body: | ; CHECK: G_STORE %0(p0), %0(p0) :: (store 8 into %ir.addr) G_STORE %0(p0), %0(p0) :: (store 8 into %ir.addr) - ; CHECK: G_STORE %5(s64), %0(p0) :: (store 16 into %ir.addr) + ; CHECK: G_STORE %5(s64), %0(p0) :: (store 8 into %ir.addr, align 16) ; CHECK: [[OFFSET1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[OFFSET1]](s64) - ; CHECK: G_STORE %6(s64), [[GEP1]](p0) :: (store 16 into %ir.addr) + ; CHECK: G_STORE %6(s64), [[GEP1]](p0) :: (store 8 into %ir.addr + 8) %6(s64) = G_PTRTOINT %0(p0) %7(s128) = G_MERGE_VALUES %5, %6 G_STORE %7, %0 :: (store 16 into %ir.addr) diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-load-vector.mir b/test/CodeGen/AArch64/GlobalISel/legalize-load-vector.mir new file mode 100644 index 000000000000..5559d37bc221 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-load-vector.mir @@ -0,0 +1,61 @@ +# RUN: not llc %s -o - -run-pass=legalizer 2>&1 | FileCheck %s + +# Check we don't infinitely loop on (currently) illegal non-extending loads +# CHECK: LLVM ERROR: unable to legalize instruction + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--linux-gnu" + + ; Function Attrs: noinline nounwind optnone + define dso_local float @simulated_vgetq_lane_f16(<8 x half> %vec, i32 %lane) #0 { + entry: + %__ret.i = alloca <4 x half>, align 8 + ret float 0.0 + } + + attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+fp-armv8,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +... +--- +name: simulated_vgetq_lane_f16 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 16 + adjustsStack: false + hasCalls: true + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: + - { id: 0, name: __ret.i, type: default, offset: 0, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +constants: +body: | + bb.1.entry: + liveins: $x0 + + %0:_(p0) = COPY $x0 + %1:_(<4 x s16>) = G_LOAD %0:_(p0) :: (load 8 from %ir.__ret.i) + $x1 = COPY %1(<4 x s16>) + +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir index e6171380344e..69612eb0f179 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -26,5 +26,5 @@ body: | %2(s8) = G_MERGE_VALUES %1(s4), %1(s4) %3(s8) = COPY %2(s8) %4(s64) = G_ANYEXT %3(s8) - %x0 = COPY %4(s64) + $x0 = COPY %4(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir index c94d73920ca3..e6e6ab7825f5 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -8,7 +8,12 @@ entry: ret void } - define void @test_mul_overflow() { ret void } + define void @test_smul_overflow() { + ret void + } + define void @test_umul_overflow() { + ret void + } ... --- @@ -22,47 +27,74 @@ registers: - { id: 5, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_scalar_mul_small - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[TRUNC]], [[TRUNC1]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[MUL]](s32) - ; CHECK: %x0 = COPY [[ANYEXT]](s64) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[ANYEXT]](s64) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_MUL %2, %3 %5(s64) = G_ANYEXT %4 - %x0 = COPY %5 + $x0 = COPY %5 ... --- -name: test_mul_overflow +name: test_smul_overflow body: | bb.0: - liveins: %x0, %x1, %w2, %w3 + liveins: $x0, $x1, $w2, $w3 - ; CHECK-LABEL: name: test_mul_overflow - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK-LABEL: name: test_smul_overflow + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[C]] - ; CHECK: %x0 = COPY [[MUL]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]] + ; CHECK: $x0 = COPY [[MUL]](s64) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; CHECK: %w0 = COPY [[COPY2]](s32) - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 + ; CHECK: $w0 = COPY [[COPY2]](s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 %2:_(s64), %3:_(s1) = G_SMULO %0, %1 - %x0 = COPY %2 + $x0 = COPY %2 + %4:_(s32) = G_ANYEXT %3 + $w0 = COPY %4 + +... + + +--- +name: test_umul_overflow +body: | + bb.0: + liveins: $x0, $x1, $w2, $w3 + + ; CHECK-LABEL: name: test_umul_overflow + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] + ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]] + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] + ; CHECK: $x0 = COPY [[MUL]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64), %3:_(s1) = G_UMULO %0, %1 + $x0 = COPY %2 %4:_(s32) = G_ANYEXT %3 - %w0 = COPY %4 + $w0 = COPY %4 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir b/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir index 168e1df02775..f0a45bf3cf0c 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -19,16 +19,15 @@ registers: - { id: 5, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_legalize_merge_v3s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[MV:%[0-9]+]]:_(<3 x s64>) = G_MERGE_VALUES [[COPY]](s64), [[COPY]](s64), [[COPY]](s64) - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s64>) = COPY [[MV]](<3 x s64>) - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<3 x s64>) - ; CHECK: %x0 = COPY [[UV]](s64) - %0(s64) = COPY %x0 + ; CHECK: $x0 = COPY [[COPY]](s64) + ; CHECK: $noreg = PATCHABLE_RET [[MV]](<3 x s64>) + %0(s64) = COPY $x0 %1(<3 x s64>) = G_MERGE_VALUES %0(s64), %0(s64), %0(s64) - %2(<3 x s64>) = COPY %1(<3 x s64>) - %3(s64), %4(s64), %5(s64) = G_UNMERGE_VALUES %2(<3 x s64>) - %x0 = COPY %3(s64) + %2(s64), %3(s64), %4(s64) = G_UNMERGE_VALUES %1(<3 x s64>) + $x0 = COPY %3(s64) + $noreg = PATCHABLE_RET %1(<3 x s64>) ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-or.mir b/test/CodeGen/AArch64/GlobalISel/legalize-or.mir index 9dbade2c193a..4713404e58e3 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-or.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-or.mir @@ -1,55 +1,56 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=legalizer %s -o - | FileCheck %s --- name: test_scalar_or_small body: | bb.0: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_scalar_or_small - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) - ; CHECK: %x0 = COPY [[TRUNC2]](s8) - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK: $x0 = COPY [[ANYEXT]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 %2:_(s8) = G_TRUNC %0 %3:_(s8) = G_TRUNC %1 %4:_(s8) = G_OR %2, %3 - %x0 = COPY %4 + %5:_(s64) = G_ANYEXT %4 + $x0 = COPY %5 ... --- name: test_big_scalar_power_of_2 body: | bb.0: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; We have a temporary G_MERGE_VALUES in the legalizer that gets ; cleaned up with the G_UNMERGE_VALUES, so we end up directly ; copying the results of the G_OR ops. ; CHECK-LABEL: name: test_big_scalar_power_of_2 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY %x2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY %x3 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY2]] ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[COPY3]] - ; CHECK: %x0 = COPY [[OR]](s64) - ; CHECK: %x1 = COPY [[OR1]](s64) - ; CHECK: RET_ReallyLR implicit %x0, implicit %x1 - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 - %2:_(s64) = COPY %x2 - %3:_(s64) = COPY %x3 + ; CHECK: $x0 = COPY [[OR]](s64) + ; CHECK: $x1 = COPY [[OR1]](s64) + ; CHECK: RET_ReallyLR implicit $x0, implicit $x1 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = COPY $x3 %4:_(s128) = G_MERGE_VALUES %0, %1 %5:_(s128) = G_MERGE_VALUES %2, %3 %6:_(s128) = G_OR %4, %5 %7:_(s64), %8:_(s64) = G_UNMERGE_VALUES %6 - %x0 = COPY %7 - %x1 = COPY %8 - RET_ReallyLR implicit %x0, implicit %x1 + $x0 = COPY %7 + $x1 = COPY %8 + RET_ReallyLR implicit $x0, implicit $x1 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir index 807c2320058a..7c4bbfcc63f8 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -1,5 +1,4 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-unknown-unknown -global-isel -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --- | ; ModuleID = '/tmp/test.ll' source_filename = "/tmp/test.ll" @@ -66,8 +65,8 @@ body: | ; CHECK-LABEL: name: legalize_phi ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: %w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -89,17 +88,17 @@ body: | ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: %w0 = COPY [[AND]](s32) - ; CHECK: RET_ReallyLR implicit %w0 + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 bb.0: ; Test that we insert legalization artifacts(Truncs here) into the correct BBs ; while legalizing the G_PHI to s16. successors: %bb.1(0x40000000), %bb.2(0x40000000) - liveins: %w0 + liveins: $w0 - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_CONSTANT i32 0 %3(s32) = G_CONSTANT i32 1 %6(s32) = G_CONSTANT i32 2 @@ -123,8 +122,8 @@ body: | bb.3: %9(s1) = G_PHI %5(s1), %bb.1, %8(s1), %bb.2 %10(s32) = G_ZEXT %9(s1) - %w0 = COPY %10(s32) - RET_ReallyLR implicit %w0 + $w0 = COPY %10(s32) + RET_ReallyLR implicit $w0 ... --- @@ -147,10 +146,10 @@ body: | ; CHECK-LABEL: name: legalize_phi_ptr ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: %w2, %x0, %x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY %x1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %w2 + ; CHECK: liveins: $w2, $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 ; CHECK: G_BR %bb.2 @@ -158,16 +157,16 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(p0) = G_PHI [[COPY]](p0), %bb.0, [[COPY1]](p0), %bb.1 - ; CHECK: %x0 = COPY [[PHI]](p0) - ; CHECK: RET_ReallyLR implicit %x0 + ; CHECK: $x0 = COPY [[PHI]](p0) + ; CHECK: RET_ReallyLR implicit $x0 bb.1: successors: %bb.2, %bb.3 - liveins: %w2, %x0, %x1 + liveins: $w2, $x0, $x1 - %0(p0) = COPY %x0 - %1(p0) = COPY %x1 - %4(s32) = COPY %w2 + %0(p0) = COPY $x0 + %1(p0) = COPY $x1 + %4(s32) = COPY $w2 %2(s1) = G_TRUNC %4(s32) G_BRCOND %2(s1), %bb.2 G_BR %bb.3 @@ -177,8 +176,8 @@ body: | bb.3: %3(p0) = G_PHI %0(p0), %bb.1, %1(p0), %bb.2 - %x0 = COPY %3(p0) - RET_ReallyLR implicit %x0 + $x0 = COPY %3(p0) + RET_ReallyLR implicit $x0 ... --- @@ -206,8 +205,8 @@ body: | ; CHECK-LABEL: name: legalize_phi_empty ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: %w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -229,17 +228,17 @@ body: | ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: %w0 = COPY [[AND]](s32) - ; CHECK: RET_ReallyLR implicit %w0 + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) - liveins: %w0 + liveins: $w0 ; Test that we properly legalize a phi with a predecessor that's empty - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_CONSTANT i32 0 %3(s32) = G_CONSTANT i32 3 %6(s32) = G_CONSTANT i32 1 @@ -263,8 +262,8 @@ body: | bb.3: %9(s1) = G_PHI %8(s1), %bb.1, %5(s1), %bb.2 %10(s32) = G_ZEXT %9(s1) - %w0 = COPY %10(s32) - RET_ReallyLR implicit %w0 + $w0 = COPY %10(s32) + RET_ReallyLR implicit $w0 ... --- @@ -289,14 +288,14 @@ body: | ; CHECK-LABEL: name: legalize_phi_loop ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: %w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, %14(s16), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC3:%[0-9]+]](s16), %bb.1 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT]], [[COPY1]] @@ -306,20 +305,20 @@ body: | ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[COPY]] ; CHECK: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) + ; CHECK: [[TRUNC3]]:_(s16) = G_TRUNC [[ADD]](s32) ; CHECK: G_BRCOND [[TRUNC2]](s1), %bb.1 ; CHECK: bb.2: ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CHECK: %w0 = COPY [[AND1]](s32) - ; CHECK: RET_ReallyLR implicit %w0 + ; CHECK: $w0 = COPY [[AND1]](s32) + ; CHECK: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x80000000) - liveins: %w0 + liveins: $w0 ; Test that we properly legalize a phi that uses a value from the same BB - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %2(s8) = G_CONSTANT i8 1 %7(s8) = G_CONSTANT i8 0 @@ -334,8 +333,8 @@ body: | bb.3: %6(s32) = G_ZEXT %3(s8) - %w0 = COPY %6(s32) - RET_ReallyLR implicit %w0 + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 ... --- @@ -357,31 +356,31 @@ body: | ; CHECK-LABEL: name: legalize_phi_cycle ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: %w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, %8(s16), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[COPY1:%[0-9]+]](s16), %bb.1 ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[PHI]](s16) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[COPY]] ; CHECK: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[PHI]](s16) + ; CHECK: [[COPY1]]:_(s16) = COPY [[PHI]](s16) ; CHECK: G_BRCOND [[TRUNC2]](s1), %bb.1 ; CHECK: bb.2: - ; CHECK: %w0 = COPY [[AND]](s32) - ; CHECK: RET_ReallyLR implicit %w0 + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x80000000) - liveins: %w0 + liveins: $w0 ; Test that we properly legalize a phi that uses itself - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %4(s8) = G_CONSTANT i8 0 bb.1: @@ -393,8 +392,8 @@ body: | G_BRCOND %3(s1), %bb.1 bb.3: - %w0 = COPY %2(s32) - RET_ReallyLR implicit %w0 + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 ... --- @@ -426,8 +425,8 @@ body: | ; CHECK-LABEL: name: legalize_phi_same_bb ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: %w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -456,19 +455,19 @@ body: | ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[C]]1, [[C]]2 - ; CHECK: %w0 = COPY [[C]]3(s32) - ; CHECK: RET_ReallyLR implicit %w0 + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] + ; CHECK: $w0 = COPY [[ADD2]](s32) + ; CHECK: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) - liveins: %w0 + liveins: $w0 ; Make sure that we correctly insert the new legalized G_PHI at the ; correct location (ie make sure G_PHIs are the first insts in the BB). - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_CONSTANT i32 0 %3(s32) = G_CONSTANT i32 3 %6(s32) = G_CONSTANT i32 1 @@ -496,8 +495,8 @@ body: | %11(s32) = G_ZEXT %9(s8) %12(s32) = G_ZEXT %10(s8) %13(s32) = G_ADD %11, %12 - %w0 = COPY %13(s32) - RET_ReallyLR implicit %w0 + $w0 = COPY %13(s32) + RET_ReallyLR implicit $w0 ... --- @@ -530,8 +529,9 @@ body: | ; CHECK-LABEL: name: legalize_phi_diff_bb ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: %w0, %w1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -546,7 +546,7 @@ body: | ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC2]](s16), %bb.0, [[C]]2(s16), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC2]](s16), %bb.0, [[TRUNC5:%[0-9]+]](s16), %bb.1 ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[PHI]](s16) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) @@ -554,27 +554,27 @@ body: | ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] ; CHECK: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[ADD1]](s32), [[C3]] ; CHECK: [[TRUNC4:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[PHI]](s16) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[C4]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[PHI]](s16) + ; CHECK: [[TRUNC5]]:_(s16) = G_TRUNC [[C4]](s32) ; CHECK: G_BRCOND [[TRUNC4]](s1), %bb.2 ; CHECK: G_BR %bb.1 ; CHECK: bb.2: - ; CHECK: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[COPY1]](s16), %bb.1, [[TRUNC1]](s16), %bb.0 + ; CHECK: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[COPY2]](s16), %bb.1, [[TRUNC1]](s16), %bb.0 ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[C]]8, [[C]]7 - ; CHECK: %w0 = COPY [[AND1]](s32) - ; CHECK: RET_ReallyLR implicit %w0 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C6]] + ; CHECK: $w0 = COPY [[AND1]](s32) + ; CHECK: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x40000000), %bb.3(0x40000000) - liveins: %w0, %w1 + liveins: $w0, $w1 ; Make sure that we correctly legalize PHIs sharing common defs ; in different BBs. - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_CONSTANT i32 0 %4(s32) = G_CONSTANT i32 3 %9(s32) = G_CONSTANT i32 1 @@ -599,7 +599,7 @@ body: | bb.3: %13(s8) = G_PHI %7(s8), %bb.1, %6(s8), %bb.0 %14(s32) = G_ZEXT %13(s8) - %w0 = COPY %14(s32) - RET_ReallyLR implicit %w0 + $w0 = COPY %14(s32) + RET_ReallyLR implicit $w0 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir b/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir index be3485919973..0b328b6345e0 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -13,28 +13,28 @@ name: test_pow body: | bb.0.entry: - liveins: %d0, %d1, %s2, %s3 + liveins: $d0, $d1, $s2, $s3 ; CHECK-LABEL: name: test_pow ; CHECK: hasCalls: true - %0:_(s64) = COPY %d0 - %1:_(s64) = COPY %d1 - %2:_(s32) = COPY %s2 - %3:_(s32) = COPY %s3 + %0:_(s64) = COPY $d0 + %1:_(s64) = COPY $d1 + %2:_(s32) = COPY $s2 + %3:_(s32) = COPY $s3 - ; CHECK: %d0 = COPY %0 - ; CHECK: %d1 = COPY %1 - ; CHECK: BL $pow, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit-def %d0 - ; CHECK: %4:_(s64) = COPY %d0 + ; CHECK: $d0 = COPY %0 + ; CHECK: $d1 = COPY %1 + ; CHECK: BL &pow, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $d0, implicit $d1, implicit-def $d0 + ; CHECK: %4:_(s64) = COPY $d0 %4:_(s64) = G_FPOW %0, %1 - %x0 = COPY %4 + $x0 = COPY %4 - ; CHECK: %s0 = COPY %2 - ; CHECK: %s1 = COPY %3 - ; CHECK: BL $powf, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %s1, implicit-def %s0 - ; CHECK: %5:_(s32) = COPY %s0 + ; CHECK: $s0 = COPY %2 + ; CHECK: $s1 = COPY %3 + ; CHECK: BL &powf, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit $s1, implicit-def $s0 + ; CHECK: %5:_(s32) = COPY $s0 %5:_(s32) = G_FPOW %2, %3 - %w0 = COPY %5 + $w0 = COPY %5 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-property.mir b/test/CodeGen/AArch64/GlobalISel/legalize-property.mir index 1381484443e6..1769e20d5bc3 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-property.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-property.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir b/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir index 7303a9c26fc9..35e71d615562 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -30,19 +30,19 @@ registers: - { id: 2, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_urem_64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[COPY]], [[COPY1]] ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[UDIV]], [[COPY1]] ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY]], [[MUL]] - ; CHECK: %x0 = COPY [[SUB]](s64) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[SUB]](s64) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_UREM %0, %1 - %x0 = COPY %2 + $x0 = COPY %2 ... @@ -56,23 +56,23 @@ registers: - { id: 5, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_srem_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[TRUNC]], [[TRUNC1]] ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SDIV]], [[TRUNC1]] ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[MUL]] - ; CHECK: %w0 = COPY [[SUB]](s32) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $w0 = COPY [[SUB]](s32) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %3(s32) = G_TRUNC %0 %4(s32) = G_TRUNC %1 %5(s32) = G_SREM %3, %4 - %w0 = COPY %5 + $w0 = COPY %5 ... --- @@ -85,12 +85,12 @@ registers: - { id: 8, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_srem_8 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]] @@ -107,14 +107,14 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[MUL]](s32) ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC3]], [[COPY3]] ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK: %w0 = COPY [[COPY4]](s32) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $w0 = COPY [[COPY4]](s32) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %6(s8) = G_TRUNC %0 %7(s8) = G_TRUNC %1 %8(s8) = G_SREM %6, %7 %9:_(s32) = G_ANYEXT %8 - %w0 = COPY %9 + $w0 = COPY %9 ... --- name: test_frem @@ -127,33 +127,33 @@ registers: - { id: 5, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_frem - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def %sp, implicit %sp - ; CHECK: %d0 = COPY [[COPY]](s64) - ; CHECK: %d1 = COPY [[COPY1]](s64) - ; CHECK: BL $fmod, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit-def %d0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY %d0 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def %sp, implicit %sp - ; CHECK: %x0 = COPY [[COPY2]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $d0 = COPY [[COPY]](s64) + ; CHECK: $d1 = COPY [[COPY1]](s64) + ; CHECK: BL &fmod, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $d0, implicit $d1, implicit-def $d0 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY2]](s64) ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def %sp, implicit %sp - ; CHECK: %s0 = COPY [[TRUNC]](s32) - ; CHECK: %s1 = COPY [[TRUNC1]](s32) - ; CHECK: BL $fmodf, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %s1, implicit-def %s0 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %s0 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def %sp, implicit %sp - ; CHECK: %w0 = COPY [[COPY3]](s32) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $s0 = COPY [[TRUNC]](s32) + ; CHECK: $s1 = COPY [[TRUNC1]](s32) + ; CHECK: BL &fmodf, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit $s1, implicit-def $s0 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $w0 = COPY [[COPY3]](s32) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_FREM %0, %1 - %x0 = COPY %2 + $x0 = COPY %2 %3(s32) = G_TRUNC %0 %4(s32) = G_TRUNC %1 %5(s32) = G_FREM %3, %4 - %w0 = COPY %5 + $w0 = COPY %5 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-sextload.mir b/test/CodeGen/AArch64/GlobalISel/legalize-sextload.mir new file mode 100644 index 000000000000..cfd1550303fd --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-sextload.mir @@ -0,0 +1,24 @@ +# RUN: llc -O0 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + define void @test_zextload(i8* %addr) { + entry: + ret void + } +... + +--- +name: test_zextload +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_zextload + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_SEXTLOAD %0 :: (load 1 from %ir.addr) + $w0 = COPY %1 +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir index 7c84902f3b4e..781b5d8cde81 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -22,50 +22,51 @@ registers: - { id: 6, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_shift - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]] ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[C1]] - ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]] - ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[ASHR1]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ASHR2]](s32) - ; CHECK: %w0 = COPY [[COPY2]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C2]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C2]] ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C3]] - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[AND1]] + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C3]] + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND2]] ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: %w0 = COPY [[COPY3]](s32) + ; CHECK: $w0 = COPY [[COPY3]](s32) ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY1]]0, [[COPY1]]1 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]]2(s32) - ; CHECK: %w0 = COPY [[COPY4]](s32) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC5]], [[C4]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[TRUNC4]], [[AND3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; CHECK: $w0 = COPY [[COPY4]](s32) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_ASHR %2, %3 %7:_(s32) = G_ANYEXT %4 - %w0 = COPY %7 + $w0 = COPY %7 %5(s8) = G_LSHR %2, %3 %8:_(s32) = G_ANYEXT %5 - %w0 = COPY %8 + $w0 = COPY %8 %6(s8) = G_SHL %2, %3 %9:_(s32) = G_ANYEXT %6 - %w0 = COPY %9 + $w0 = COPY %9 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir b/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir index a7329916ea83..3da689d4265c 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -45,46 +45,46 @@ body: | ; CHECK-LABEL: name: test_simple ; CHECK: bb.0.{{[a-zA-Z0-9]+}}: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](s64) ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[INTTOPTR]](p0) - ; CHECK: %x0 = COPY [[PTRTOINT]](s64) + ; CHECK: $x0 = COPY [[PTRTOINT]](s64) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 ; CHECK: bb.1.{{[a-zA-Z0-9]+}}: ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[TRUNC2]], [[TRUNC3]] ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) - ; CHECK: %w0 = COPY [[COPY1]](s32) + ; CHECK: $w0 = COPY [[COPY1]](s32) ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[TRUNC4]], [[TRUNC5]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) - ; CHECK: %w0 = COPY [[COPY2]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) ; CHECK: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC7:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[TRUNC6]], [[TRUNC7]] ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) - ; CHECK: %w0 = COPY [[COPY3]](s32) + ; CHECK: $w0 = COPY [[COPY3]](s32) ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[TRUNC1]], [[TRUNC1]] ; CHECK: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[COPY]], [[COPY]] - ; CHECK: %x0 = COPY [[SELECT4]](s64) + ; CHECK: $x0 = COPY [[SELECT4]](s64) ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64) ; CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]](<2 x s32>) - ; CHECK: %x0 = COPY [[BITCAST1]](s64) + ; CHECK: $x0 = COPY [[BITCAST1]](s64) ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SELECT3]](s32) - ; CHECK: %w0 = COPY [[BITCAST2]](s32) + ; CHECK: $w0 = COPY [[BITCAST2]](s32) ; CHECK: [[BITCAST3:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[COPY]](s64) ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST3]](<4 x s8>) - ; CHECK: %w0 = COPY [[BITCAST4]](s32) + ; CHECK: $w0 = COPY [[BITCAST4]](s32) ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s64) ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST5]](<2 x s16>) - ; CHECK: %w0 = COPY [[BITCAST6]](s32) + ; CHECK: $w0 = COPY [[BITCAST6]](s32) bb.0.entry: - liveins: %x0, %x1, %x2, %x3 - %0(s64) = COPY %x0 + liveins: $x0, $x1, $x2, $x3 + %0(s64) = COPY $x0 %1(s1) = G_TRUNC %0 %2(s8) = G_TRUNC %0 @@ -93,7 +93,7 @@ body: | %5(p0) = G_INTTOPTR %0 %6(s64) = G_PTRTOINT %5 - %x0 = COPY %6 + $x0 = COPY %6 G_BRCOND %1, %bb.1 @@ -101,31 +101,31 @@ body: | %7(s1) = G_SELECT %1, %1, %1 %21:_(s32) = G_ANYEXT %7 - %w0 = COPY %21 + $w0 = COPY %21 %8(s8) = G_SELECT %1, %2, %2 %20:_(s32) = G_ANYEXT %8 - %w0 = COPY %20 + $w0 = COPY %20 %9(s16) = G_SELECT %1, %3, %3 %19:_(s32) = G_ANYEXT %9 - %w0 = COPY %19 + $w0 = COPY %19 %10(s32) = G_SELECT %1, %4, %4 %11(s64) = G_SELECT %1, %0, %0 - %x0 = COPY %11 + $x0 = COPY %11 %12(<2 x s32>) = G_BITCAST %0 %13(s64) = G_BITCAST %12 - %x0 = COPY %13 + $x0 = COPY %13 %14(s32) = G_BITCAST %10 - %w0 = COPY %14 + $w0 = COPY %14 %15(<4 x s8>) = G_BITCAST %0 %17:_(s32) = G_BITCAST %15 - %w0 = COPY %17 + $w0 = COPY %17 %16(<2 x s16>) = G_BITCAST %0 %18:_(s32) = G_BITCAST %16 - %w0 = COPY %18 + $w0 = COPY %18 ... --- @@ -138,22 +138,22 @@ registers: - { id: 3, class: _} body: | bb.1: - liveins: %x0, %x1 + liveins: $x0, $x1 ; This is legal and shouldn't be changed. ; CHECK-LABEL: name: bitcast128 - ; CHECK: liveins: %x0, %x1 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[MV]](s128) - ; CHECK: %q0 = COPY [[BITCAST]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit %q0 - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $q0 = COPY [[BITCAST]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %3(s128) = G_MERGE_VALUES %0(s64), %1(s64) %2(<2 x s64>) = G_BITCAST %3(s128) - %q0 = COPY %2(<2 x s64>) - RET_ReallyLR implicit %q0 + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 ... --- @@ -166,19 +166,19 @@ registers: - { id: 3, class: _} body: | bb.1: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: testExtOfCopyOfTrunc - ; CHECK: liveins: %x0 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: %x0 = COPY [[COPY1]](s64) - ; CHECK: RET_ReallyLR implicit %x0 - %0(s64) = COPY %x0 + ; CHECK: $x0 = COPY [[COPY1]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %0(s64) = COPY $x0 %1(s1) = G_TRUNC %0 %2(s1) = COPY %1 %3(s64) = G_ANYEXT %2 - %x0 = COPY %3 - RET_ReallyLR implicit %x0 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 ... --- @@ -191,19 +191,19 @@ registers: - { id: 3, class: _} body: | bb.1: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: testExtOf2CopyOfTrunc - ; CHECK: liveins: %x0 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: %x0 = COPY [[COPY1]](s64) - ; CHECK: RET_ReallyLR implicit %x0 - %0(s64) = COPY %x0 + ; CHECK: $x0 = COPY [[COPY1]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %0(s64) = COPY $x0 %1(s1) = G_TRUNC %0 %2(s1) = COPY %1 %4:_(s1) = COPY %2 %3(s64) = G_ANYEXT %4 - %x0 = COPY %3 - RET_ReallyLR implicit %x0 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir b/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir index 4baab17f464f..5f50ce047cef 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -21,21 +21,21 @@ registers: - { id: 5, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_scalar_sub_small - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[TRUNC1]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SUB]](s32) - ; CHECK: %x0 = COPY [[ANYEXT]](s64) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[ANYEXT]](s64) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_SUB %2, %3 %5(s64) = G_ANYEXT %4 - %x0 = COPY %5 + $x0 = COPY %5 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir b/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir index 6342fe373efd..022fd13d1789 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_implicit_def @@ -14,5 +14,5 @@ body: | ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[DEF]](s64), [[DEF1]](s64) %0:_(s128) = G_IMPLICIT_DEF %1:_(s64) = G_TRUNC %0 - %x0 = COPY %1 + $x0 = COPY %1 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir b/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir index 85b65e945486..12e7bb489d6e 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -23,6 +23,6 @@ body: | ; CHECK: unable to legalize instruction: {{.*}} G_UNMERGE_VALUES %1(s4), %2(s4)= G_UNMERGE_VALUES %0(s8) %3(s64) = G_ANYEXT %1(s4) - %x0 = COPY %3(s64) + $x0 = COPY %3(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir b/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir index 72dbf083192a..42eb80e45012 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -12,7 +12,7 @@ name: test_vaarg body: | bb.0: ; CHECK-LABEL: name: test_vaarg - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load 8) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[LOAD]], [[C]](s64) @@ -28,7 +28,7 @@ body: | ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[PTR_MASK]], [[C3]](s64) ; CHECK: G_STORE [[GEP3]](p0), [[COPY]](p0) :: (store 8) - %0:_(p0) = COPY %x0 + %0:_(p0) = COPY $x0 %1:_(s8) = G_VAARG %0(p0), 1 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir b/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir index 32cc63028f58..6958d30d3654 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -21,21 +21,21 @@ registers: - { id: 5, class: _ } body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 ; CHECK-LABEL: name: test_scalar_xor_small - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) - ; CHECK: %x0 = COPY [[ANYEXT]](s64) - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[ANYEXT]](s64) + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_XOR %2, %3 %5(s64) = G_ANYEXT %4 - %x0 = COPY %5 + $x0 = COPY %5 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-zextload.mir b/test/CodeGen/AArch64/GlobalISel/legalize-zextload.mir new file mode 100644 index 000000000000..66c3f2577360 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-zextload.mir @@ -0,0 +1,24 @@ +# RUN: llc -O0 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + define void @test_sextload(i8* %addr) { + entry: + ret void + } +... + +--- +name: test_sextload +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_sextload + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s32) = G_ZEXTLOAD %0 :: (load 1 from %ir.addr) + $w0 = COPY %1 +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir new file mode 100644 index 000000000000..e31b24d1b2d3 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -0,0 +1,294 @@ +# RUN: llc -mtriple=aarch64-- -run-pass=legalizer %s \ +# RUN: -mcpu=cortex-a75 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK + +# RUN: llc -mtriple=aarch64-- -run-pass=legalizer %s -debug-only=legalizer-info \ +# RUN: -mcpu=cortex-a75 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,DEBUG + +# REQUIRES: asserts + +# The main purpose of this test is to make sure we don't over-relax +# LegalizerInfo validation and loose its ability to catch bugs. +# +# Watch out for every "SKIPPED: user-defined predicate detected" in the +# check-lines below and keep each and every one of them justified. + + +# DEBUG: G_ADD (opcode [[ADD_OPC:[0-9]+]]): 1 type index +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_SUB (opcode [[SUB_OPC:[0-9]+]]): 1 type index +# DEBUG-NEXT: .. opcode [[SUB_OPC]] is aliased to [[ADD_OPC]] +# DEBUG-NEXT: .. opcode [[ADD_OPC]] is aliased to 0 +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_MUL (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_SDIV (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_UDIV (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_SREM (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_UREM (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_AND (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_OR (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_XOR (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_IMPLICIT_DEF (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_PHI (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FRAME_INDEX (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_GLOBAL_VALUE (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_EXTRACT (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_UNMERGE_VALUES (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_INSERT (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_MERGE_VALUES (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_PTRTOINT (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_INTTOPTR (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_BITCAST (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_LOAD (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_SEXTLOAD (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ZEXTLOAD (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_STORE (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode {{[0-9]+}}): 3 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMIC_CMPXCHG (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_XCHG (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_ADD (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_SUB (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_AND (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_NAND (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_ATOMICRMW_OR (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_XOR (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_MAX (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_MIN (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_UMAX (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_ATOMICRMW_UMIN (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected +# +# DEBUG-NEXT: G_BRCOND (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_BRINDIRECT (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_INTRINSIC (opcode {{[0-9]+}}): 0 type indices +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_INTRINSIC_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_ANYEXT (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_TRUNC (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_CONSTANT (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FCONSTANT (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_VASTART (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_VAARG (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_SEXT (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_ZEXT (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_SHL (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_LSHR (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_ASHR (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_ICMP (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_FCMP (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_SELECT (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_UADDE (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_USUBE (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_SADDO (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_SSUBO (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_UMULO (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_SMULO (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_UMULH (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_SMULH (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FADD (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FSUB (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FMUL (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FMA (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FDIV (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FREM (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FPOW (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_FEXP (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_FEXP2 (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_FLOG (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_FLOG2 (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_FNEG (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_FPTOSI (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_FPTOUI (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_SITOFP (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_UITOFP (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_FABS (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_GEP (opcode {{[0-9]+}}): 2 type indices +# DEBUG: .. the first uncovered type index: 2, OK +# +# DEBUG-NEXT: G_PTR_MASK (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK +# +# DEBUG-NEXT: G_BR (opcode {{[0-9]+}}): 0 type indices +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_INSERT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_EXTRACT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_SHUFFLE_VECTOR (opcode {{[0-9]+}}): 3 type indices +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# +# DEBUG-NEXT: G_BSWAP (opcode {{[0-9]+}}): 1 type index +# DEBUG: .. the first uncovered type index: 1, OK + +# CHECK-NOT: ill-defined + +--- +name: dummy +body: | + bb.0: +... diff --git a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir index d4ed70fa5316..5d4dad691664 100644 --- a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir +++ b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir @@ -75,10 +75,10 @@ registers: # CHECK-NEXT: G_FADD %0, %2 body: | bb.0 (%ir-block.0): - liveins: %s0, %w0 + liveins: $s0, $w0 - %0(s32) = COPY %s0 - %6(s32) = COPY %w0 + %0(s32) = COPY $s0 + %6(s32) = COPY $w0 %1(s1) = G_TRUNC %6 %4(s32) = G_FCONSTANT float 1.000000e+00 %5(s32) = G_FCONSTANT float 2.000000e+00 @@ -93,7 +93,7 @@ body: | bb.3.end: %2(s32) = PHI %4(s32), %bb.1, %5(s32), %bb.2 %3(s32) = G_FADD %0, %2 - %s0 = COPY %3(s32) - RET_ReallyLR implicit %s0 + $s0 = COPY %3(s32) + RET_ReallyLR implicit $s0 ... diff --git a/test/CodeGen/AArch64/GlobalISel/localizer.mir b/test/CodeGen/AArch64/GlobalISel/localizer.mir index 5de006a7d3fa..33a1cf3112e1 100644 --- a/test/CodeGen/AArch64/GlobalISel/localizer.mir +++ b/test/CodeGen/AArch64/GlobalISel/localizer.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=localizer -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefix=CHECK +# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=localizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK # Test the localizer. @@ -274,8 +274,8 @@ body: | ; CHECK-LABEL: name: non_local_label ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: %s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: liveins: $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 ; CHECK: [[C:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) @@ -288,10 +288,10 @@ body: | ; The newly created reg should be on the same regbank/regclass as its origin. bb.0: - liveins: %s0 + liveins: $s0 successors: %bb.1 - %0:fpr(s32) = COPY %s0 + %0:fpr(s32) = COPY $s0 %1:fpr(s32) = G_FCONSTANT float 1.0 bb.1: diff --git a/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir b/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir new file mode 100644 index 000000000000..8ca81a3bd403 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir @@ -0,0 +1,280 @@ +# RUN: llc -run-pass machine-cse -verify-machineinstrs -mtriple aarch64-apple-ios %s -o - | FileCheck %s +--- +name: irtranslated +legalized: false +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: irtranslated + ; CHECK: %[[ONE:[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %[[TWO:[0-9]+]]:_(s32) = G_ADD %[[ONE]], %[[ONE]] + ; CHECK-NEXT: %[[SUM:[0-9]+]]:_(s32) = G_ADD %[[TWO]], %[[TWO]] + ; CHECK-NEXT: $[[RET:[wx][0-9]+]] = COPY %[[SUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $[[RET]] + bb.0: + %0:_(s32) = G_CONSTANT i32 1 + %1:_(s32) = G_ADD %0, %0 + %2:_(s32) = G_ADD %0, %0 + %3:_(s32) = G_ADD %1, %2 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 +... +--- +name: regbankselected +legalized: true +regBankSelected: true +selected: false +body: | + ; CHECK-LABEL: name: regbankselected + ; CHECK: %[[ONE:[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %[[TWO:[0-9]+]]:gpr(s32) = G_ADD %[[ONE]], %[[ONE]] + ; CHECK-NEXT: %[[SUM:[0-9]+]]:gpr(s32) = G_ADD %[[TWO]], %[[TWO]] + ; CHECK-NEXT: $[[RET:[wx][0-9]+]] = COPY %[[SUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $[[RET]] + bb.0: + %0:gpr(s32) = G_CONSTANT i32 1 + %1:gpr(s32) = G_ADD %0, %0 + %2:gpr(s32) = G_ADD %0, %0 + %3:gpr(s32) = G_ADD %1, %2 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 +... +--- +name: legalized +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: legalized + ; CHECK: %[[ONE:[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %[[TWO:[0-9]+]]:gpr(s32) = G_ADD %[[ONE]], %[[ONE]] + ; CHECK-NEXT: %[[SUM:[0-9]+]]:_(s32) = G_ADD %[[TWO]], %[[TWO]] + ; CHECK-NEXT: $[[RET:[wx][0-9]+]] = COPY %[[SUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $[[RET]] + bb.0: + %0:_(s32) = G_CONSTANT i32 1 + %1:_(s32) = G_ADD %0, %0 + %2:gpr(s32) = G_ADD %0, %0 + %3:_(s32) = G_ADD %1, %2 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 +... +--- +name: legalized_sym +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: legalized_sym + ; CHECK: %[[ONE:[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %[[TWO:[0-9]+]]:gpr(s32) = G_ADD %[[ONE]], %[[ONE]] + ; CHECK-NEXT: %[[SUM:[0-9]+]]:_(s32) = G_ADD %[[TWO]], %[[TWO]] + ; CHECK-NEXT: $[[RET:[wx][0-9]+]] = COPY %[[SUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $[[RET]] + bb.0: + %0:_(s32) = G_CONSTANT i32 1 + %1:gpr(s32) = G_ADD %0, %0 + %2:_(s32) = G_ADD %0, %0 + %3:_(s32) = G_ADD %1, %2 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 +... +--- +name: int_extensions +alignment: 2 +legalized: false +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: int_extensions + ; CHECK: %[[ONE:[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK-NEXT: %[[S16:[0-9]+]]:_(s16) = G_SEXT %[[ONE]](s8) + ; CHECK-NEXT: %[[S32:[0-9]+]]:_(s32) = G_SEXT %[[ONE]](s8) + ; CHECK-NEXT: %[[S16_Z64:[0-9]+]]:_(s64) = G_ZEXT %[[S16]](s16) + ; CHECK-NEXT: %[[S32_Z64:[0-9]+]]:_(s64) = G_ZEXT %[[S32]](s32) + ; CHECK-NEXT: %[[SUM:[0-9]+]]:_(s64) = G_ADD %[[S16_Z64]], %[[S32_Z64]] + ; CHECK-NEXT: $[[RET:[wx][0-9]+]] = COPY %[[SUM]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $[[RET]] + bb.0.entry: + %0:_(s8) = G_CONSTANT i8 1 + %1:_(s16) = G_SEXT %0(s8) + %2:_(s32) = G_SEXT %0(s8) + %3:_(s64) = G_ZEXT %1(s16) + %4:_(s64) = G_ZEXT %2(s32) + %5:_(s64) = G_ADD %3, %4 + $x0 = COPY %5(s64) + RET_ReallyLR implicit $x0 +... +--- +name: generic +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: generic + ; CHECK: %[[SG:[0-9]+]]:_(s32) = G_ADD %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %{{[0-9]+}}:_(s32) = G_ADD %[[SG]], %[[SG]] + bb.0: + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = COPY %2(s32) + %4:_(s32) = G_ADD %3, %3 + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 +... +--- +name: generic_to_concrete_copy +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: generic_to_concrete_copy + ; CHECK: %[[S1:[0-9]+]]:_(s32) = G_ADD %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %[[S2:[0-9]+]]:gpr32 = COPY %[[S1]](s32) + ; CHECK-NEXT: %{{[0-9]+}}:gpr32 = ADDWrr %[[S2]], %[[S2]] + bb.0: + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_ADD %0, %1 + %3:gpr32 = COPY %2(s32) + %4:gpr32 = ADDWrr %3, %3 + $w0 = COPY %4 + RET_ReallyLR implicit $w0 +... +--- +name: concrete_to_generic_copy +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: concrete_to_generic_copy + ; CHECK: %[[S1:[0-9]+]]:gpr32 = ADDWrr %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %[[S2:[0-9]+]]:_(s32) = COPY %[[S1]] + ; CHECK-NEXT: %{{[0-9]+}}:_(s32) = G_ADD %[[S2]], %[[S2]] + bb.0: + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + %2:gpr32 = ADDWrr %0, %1 + %3:_(s32) = COPY %2 + %4:_(s32) = G_ADD %3, %3 + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 +... +--- +name: concrete +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: concrete + ; CHECK: %[[SC:[0-9]+]]:gpr32 = ADDWrr %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %{{[0-9]+}}:gpr32 = ADDWrr %[[SC]], %[[SC]] + bb.0: + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + %2:gpr32 = ADDWrr %0, %1 + %3:gpr32 = COPY %2 + %4:gpr32 = ADDWrr %3, %3 + $w0 = COPY %4 + RET_ReallyLR implicit $w0 +... +--- +name: variadic_defs_unmerge_vector +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: variadic_defs_unmerge_vector + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: [[UV0:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s32) = G_ANYEXT [[UV0]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK-NEXT: [[ADD0:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT0]], [[ANYEXT1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD0]], [[ADD1]] + ; CHECK-NEXT: $w0 = COPY [[ADD2]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + %0 :_(<4 x s16>) = COPY $d0 + %1 :_(s16), %2 :_(s16), %3 :_(s16), %4 :_(s16) = G_UNMERGE_VALUES %0(<4 x s16>) + %5 :_(s16), %6 :_(s16), %7 :_(s16), %8 :_(s16) = G_UNMERGE_VALUES %0(<4 x s16>) + %9 :_(s16), %10:_(s16), %11:_(s16), %12:_(s16) = G_UNMERGE_VALUES %0(<4 x s16>) + %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %0(<4 x s16>) + %17:_(s32) = G_ANYEXT %1 (s16) + %18:_(s32) = G_ANYEXT %6 (s16) + %19:_(s32) = G_ANYEXT %11(s16) + %20:_(s32) = G_ANYEXT %16(s16) + %21:_(s32) = G_ADD %17, %18 + %22:_(s32) = G_ADD %19, %20 + %23:_(s32) = G_ADD %21, %22 + $w0 = COPY %23(s32) + RET_ReallyLR implicit $w0 +... +--- +name: variadic_defs_unmerge_scalar +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: variadic_defs_unmerge_scalar + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[UV0:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s32) = G_ANYEXT [[UV0]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK-NEXT: [[ADD0:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT0]], [[ANYEXT1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD0]], [[ADD1]] + ; CHECK-NEXT: $w0 = COPY [[ADD2]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + %0 :_(s64) = COPY $d0 + %1 :_(s16), %2 :_(s16), %3 :_(s16), %4 :_(s16) = G_UNMERGE_VALUES %0(s64) + %5 :_(s16), %6 :_(s16), %7 :_(s16), %8 :_(s16) = G_UNMERGE_VALUES %0(s64) + %9 :_(s16), %10:_(s16), %11:_(s16), %12:_(s16) = G_UNMERGE_VALUES %0(s64) + %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %0(s64) + %17:_(s32) = G_ANYEXT %1 (s16) + %18:_(s32) = G_ANYEXT %6 (s16) + %19:_(s32) = G_ANYEXT %11(s16) + %20:_(s32) = G_ANYEXT %16(s16) + %21:_(s32) = G_ADD %17, %18 + %22:_(s32) = G_ADD %19, %20 + %23:_(s32) = G_ADD %21, %22 + $w0 = COPY %23(s32) + RET_ReallyLR implicit $w0 +... +--- +name: variadic_defs_unmerge_scalar_asym +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: variadic_defs_unmerge_scalar_asym + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[UV0:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV01:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s32) = G_ANYEXT [[UV0]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK-NEXT: [[ADD0:%[0-9]+]]:_(s32) = G_ADD [[ANYEXT0]], [[ANYEXT1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV01]], [[UV23]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD0]], [[ADD1]] + ; CHECK-NEXT: $w0 = COPY [[ADD2]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + %0 :_(s64) = COPY $d0 + %1 :_(s16), %2 :_(s16), %3 :_(s16), %4 :_(s16) = G_UNMERGE_VALUES %0(s64) + %9 :_(s32), %10:_(s32) = G_UNMERGE_VALUES %0(s64) + %5 :_(s16), %6 :_(s16), %7 :_(s16), %8 :_(s16) = G_UNMERGE_VALUES %0(s64) + %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES %0(s64) + %17:_(s32) = G_ANYEXT %1 (s16) + %18:_(s32) = G_ANYEXT %6 (s16) + %21:_(s32) = G_ADD %17, %18 + %22:_(s32) = G_ADD %9, %12 + %23:_(s32) = G_ADD %21, %22 + $w0 = COPY %23(s32) + RET_ReallyLR implicit $w0 +... diff --git a/test/CodeGen/AArch64/GlobalISel/no-regclass.mir b/test/CodeGen/AArch64/GlobalISel/no-regclass.mir index 8732274fe034..2c546ac27d3b 100644 --- a/test/CodeGen/AArch64/GlobalISel/no-regclass.mir +++ b/test/CodeGen/AArch64/GlobalISel/no-regclass.mir @@ -19,13 +19,13 @@ tracksRegLiveness: true body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: unused_reg - ; CHECK: liveins: %w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 - ; CHECK: %w0 = COPY [[COPY]] - %0:gpr(s32) = COPY %w0 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY $w0 + ; CHECK: $w0 = COPY [[COPY]] + %0:gpr(s32) = COPY $w0 %1:gpr(s64) = G_MERGE_VALUES %0(s32), %0(s32) %2:gpr(s32), %3:gpr(s32) = G_UNMERGE_VALUES %1(s64) - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/reg-bank-128bit.mir b/test/CodeGen/AArch64/GlobalISel/reg-bank-128bit.mir index b675389fd5b3..401646a2e55e 100644 --- a/test/CodeGen/AArch64/GlobalISel/reg-bank-128bit.mir +++ b/test/CodeGen/AArch64/GlobalISel/reg-bank-128bit.mir @@ -1,11 +1,11 @@ -# RUN: llc -mtriple=aarch64 -global-isel -run-pass=regbankselect -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -o - %s | FileCheck %s --- name: test_large_merge legalized: true registers: body: | bb.0.entry: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: test_large_merge ; CHECK: registers: @@ -13,10 +13,10 @@ body: | ; CHECK: - { id: 1, class: gpr ; CHECK: - { id: 2, class: gpr ; CHECK: - { id: 3, class: fpr - %0:_(s64) = COPY %x0 - %1:_(s64) = COPY %x1 - %2:_(p0) = COPY %x2 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(p0) = COPY $x2 %3:_(s128) = G_MERGE_VALUES %0, %1 %4:_(s64) = G_TRUNC %3 - %d0 = COPY %4 + $d0 = COPY %4 ... diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir index 201565c675af..1528a8097710 100644 --- a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir +++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -mtriple arm64-- -run-pass=regbankselect -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple arm64-- -run-pass=regbankselect %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -19,7 +19,7 @@ !2 = !{} !3 = !{i32 2, !"Dwarf Version", i32 4} !4 = !{i32 2, !"Debug Info Version", i32 3} - !5 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) + !5 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !6 = !DISubroutineType(types: !2) !7 = !DILocalVariable(name: "in", arg: 1, scope: !5, file: !1, line: 1, type: !8) !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) @@ -34,11 +34,11 @@ legalized: true # CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } body: | bb.0: - liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK: DBG_VALUE debug-use %0(s32), debug-use %noreg, !7, !DIExpression(), debug-location !9 - DBG_VALUE debug-use %0(s32), debug-use %noreg, !7, !DIExpression(), debug-location !9 + liveins: $w0 + %0:_(s32) = COPY $w0 + ; CHECK: DBG_VALUE debug-use %0(s32), debug-use $noreg, !7, !DIExpression(), debug-location !9 + DBG_VALUE debug-use %0(s32), debug-use $noreg, !7, !DIExpression(), debug-location !9 - ; CHECK: DBG_VALUE %noreg, 0, !7, !DIExpression(), debug-location !9 - DBG_VALUE %noreg, 0, !7, !DIExpression(), debug-location !9 + ; CHECK: DBG_VALUE $noreg, 0, !7, !DIExpression(), debug-location !9 + DBG_VALUE $noreg, 0, !7, !DIExpression(), debug-location !9 ... diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir index df40a7f659ac..dc124096190f 100644 --- a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir +++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple arm64-- -run-pass=regbankselect -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple arm64-- -run-pass=regbankselect %s -o - | FileCheck %s # Check the default mappings for various instructions. @@ -80,11 +80,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_add_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_ADD %0, %0 ... @@ -96,11 +96,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %q0 + liveins: $q0 ; CHECK-LABEL: name: test_add_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 ; CHECK: [[ADD:%[0-9]+]]:fpr(<4 x s32>) = G_ADD [[COPY]], [[COPY]] - %0(<4 x s32>) = COPY %q0 + %0(<4 x s32>) = COPY $q0 %1(<4 x s32>) = G_ADD %0, %0 ... @@ -112,11 +112,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_sub_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[SUB:%[0-9]+]]:gpr(s32) = G_SUB [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_SUB %0, %0 ... @@ -128,11 +128,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %q0 + liveins: $q0 ; CHECK-LABEL: name: test_sub_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 ; CHECK: [[SUB:%[0-9]+]]:fpr(<4 x s32>) = G_SUB [[COPY]], [[COPY]] - %0(<4 x s32>) = COPY %q0 + %0(<4 x s32>) = COPY $q0 %1(<4 x s32>) = G_SUB %0, %0 ... @@ -144,11 +144,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_mul_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[MUL:%[0-9]+]]:gpr(s32) = G_MUL [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_MUL %0, %0 ... @@ -160,11 +160,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %q0 + liveins: $q0 ; CHECK-LABEL: name: test_mul_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 ; CHECK: [[MUL:%[0-9]+]]:fpr(<4 x s32>) = G_MUL [[COPY]], [[COPY]] - %0(<4 x s32>) = COPY %q0 + %0(<4 x s32>) = COPY $q0 %1(<4 x s32>) = G_MUL %0, %0 ... @@ -176,11 +176,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_and_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[AND:%[0-9]+]]:gpr(s32) = G_AND [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_AND %0, %0 ... @@ -192,11 +192,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %q0 + liveins: $q0 ; CHECK-LABEL: name: test_and_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 ; CHECK: [[AND:%[0-9]+]]:fpr(<4 x s32>) = G_AND [[COPY]], [[COPY]] - %0(<4 x s32>) = COPY %q0 + %0(<4 x s32>) = COPY $q0 %1(<4 x s32>) = G_AND %0, %0 ... @@ -208,11 +208,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_or_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[OR:%[0-9]+]]:gpr(s32) = G_OR [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_OR %0, %0 ... @@ -224,11 +224,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %q0 + liveins: $q0 ; CHECK-LABEL: name: test_or_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 ; CHECK: [[OR:%[0-9]+]]:fpr(<4 x s32>) = G_OR [[COPY]], [[COPY]] - %0(<4 x s32>) = COPY %q0 + %0(<4 x s32>) = COPY $q0 %1(<4 x s32>) = G_OR %0, %0 ... @@ -240,11 +240,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_xor_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[XOR:%[0-9]+]]:gpr(s32) = G_XOR [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_XOR %0, %0 ... @@ -256,11 +256,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %q0 + liveins: $q0 ; CHECK-LABEL: name: test_xor_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 ; CHECK: [[XOR:%[0-9]+]]:fpr(<4 x s32>) = G_XOR [[COPY]], [[COPY]] - %0(<4 x s32>) = COPY %q0 + %0(<4 x s32>) = COPY $q0 %1(<4 x s32>) = G_XOR %0, %0 ... @@ -272,11 +272,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_shl_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[SHL:%[0-9]+]]:gpr(s32) = G_SHL [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_SHL %0, %0 ... @@ -288,11 +288,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %q0 + liveins: $q0 ; CHECK-LABEL: name: test_shl_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 ; CHECK: [[SHL:%[0-9]+]]:fpr(<4 x s32>) = G_SHL [[COPY]], [[COPY]] - %0(<4 x s32>) = COPY %q0 + %0(<4 x s32>) = COPY $q0 %1(<4 x s32>) = G_SHL %0, %0 ... @@ -304,11 +304,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_lshr_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[LSHR:%[0-9]+]]:gpr(s32) = G_LSHR [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_LSHR %0, %0 ... @@ -320,11 +320,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_ashr_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[ASHR:%[0-9]+]]:gpr(s32) = G_ASHR [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_ASHR %0, %0 ... @@ -336,11 +336,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_sdiv_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[SDIV:%[0-9]+]]:gpr(s32) = G_SDIV [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_SDIV %0, %0 ... @@ -352,11 +352,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_udiv_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[UDIV:%[0-9]+]]:gpr(s32) = G_UDIV [[COPY]], [[COPY]] - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_UDIV %0, %0 ... @@ -368,11 +368,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_anyext_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[ANYEXT:%[0-9]+]]:gpr(s64) = G_ANYEXT [[COPY]](s32) - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s64) = G_ANYEXT %0 ... @@ -384,11 +384,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_sext_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[SEXT:%[0-9]+]]:gpr(s64) = G_SEXT [[COPY]](s32) - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s64) = G_SEXT %0 ... @@ -400,11 +400,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_zext_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[ZEXT:%[0-9]+]]:gpr(s64) = G_ZEXT [[COPY]](s32) - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s64) = G_ZEXT %0 ... @@ -416,11 +416,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_trunc_s32_s64 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s32) = G_TRUNC [[COPY]](s64) - %0(s64) = COPY %x0 + %0(s64) = COPY $x0 %1(s32) = G_TRUNC %0 ... @@ -457,12 +457,12 @@ registers: - { id: 2, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_icmp_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_ICMP intpred(ne), %0, %0 %2(s1) = G_TRUNC %1(s32) ... @@ -476,12 +476,12 @@ registers: - { id: 2, class: _ } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_icmp_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) - %0(p0) = COPY %x0 + %0(p0) = COPY $x0 %1(s32) = G_ICMP intpred(ne), %0, %0 %2(s1) = G_TRUNC %1(s32) ... @@ -508,11 +508,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_ptrtoint_s64_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 ; CHECK: [[PTRTOINT:%[0-9]+]]:gpr(s64) = G_PTRTOINT [[COPY]](p0) - %0(p0) = COPY %x0 + %0(p0) = COPY $x0 %1(s64) = G_PTRTOINT %0 ... @@ -524,11 +524,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_inttoptr_p0_s64 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 ; CHECK: [[INTTOPTR:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[COPY]](s64) - %0(s64) = COPY %x0 + %0(s64) = COPY $x0 %1(p0) = G_INTTOPTR %0 ... @@ -540,11 +540,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_load_s32_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load 4) - %0(p0) = COPY %x0 + %0(p0) = COPY $x0 %1(s32) = G_LOAD %0 :: (load 4) ... @@ -556,13 +556,13 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %x0, %w1 + liveins: $x0, $w1 ; CHECK-LABEL: name: test_store_s32_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1 ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4) - %0(p0) = COPY %x0 - %1(s32) = COPY %w1 + %0(p0) = COPY $x0 + %1(s32) = COPY $w1 G_STORE %1, %0 :: (store 4) ... @@ -574,11 +574,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: test_fadd_s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 ; CHECK: [[FADD:%[0-9]+]]:fpr(s32) = G_FADD [[COPY]], [[COPY]] - %0(s32) = COPY %s0 + %0(s32) = COPY $s0 %1(s32) = G_FADD %0, %0 ... @@ -590,11 +590,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: test_fsub_s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 ; CHECK: [[FSUB:%[0-9]+]]:fpr(s32) = G_FSUB [[COPY]], [[COPY]] - %0(s32) = COPY %s0 + %0(s32) = COPY $s0 %1(s32) = G_FSUB %0, %0 ... @@ -606,11 +606,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: test_fmul_s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 ; CHECK: [[FMUL:%[0-9]+]]:fpr(s32) = G_FMUL [[COPY]], [[COPY]] - %0(s32) = COPY %s0 + %0(s32) = COPY $s0 %1(s32) = G_FMUL %0, %0 ... @@ -622,11 +622,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: test_fdiv_s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 ; CHECK: [[FDIV:%[0-9]+]]:fpr(s32) = G_FDIV [[COPY]], [[COPY]] - %0(s32) = COPY %s0 + %0(s32) = COPY $s0 %1(s32) = G_FDIV %0, %0 ... @@ -638,11 +638,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: test_fpext_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 ; CHECK: [[FPEXT:%[0-9]+]]:fpr(s64) = G_FPEXT [[COPY]](s32) - %0(s32) = COPY %s0 + %0(s32) = COPY $s0 %1(s64) = G_FPEXT %0 ... @@ -654,11 +654,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: test_fptrunc_s32_s64 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 ; CHECK: [[FPTRUNC:%[0-9]+]]:fpr(s32) = G_FPTRUNC [[COPY]](s64) - %0(s64) = COPY %d0 + %0(s64) = COPY $d0 %1(s32) = G_FPTRUNC %0 ... @@ -683,12 +683,12 @@ registers: - { id: 2, class: _ } body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: test_fcmp_s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 ; CHECK: [[FCMP:%[0-9]+]]:gpr(s32) = G_FCMP floatpred(olt), [[COPY]](s32), [[COPY]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[FCMP]](s32) - %0(s32) = COPY %s0 + %0(s32) = COPY $s0 %1(s32) = G_FCMP floatpred(olt), %0, %0 %2(s1) = G_TRUNC %1(s32) ... @@ -701,11 +701,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_sitofp_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[SITOFP:%[0-9]+]]:fpr(s64) = G_SITOFP [[COPY]](s32) - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s64) = G_SITOFP %0 ... @@ -717,11 +717,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: test_uitofp_s32_s64 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 ; CHECK: [[UITOFP:%[0-9]+]]:fpr(s32) = G_UITOFP [[COPY]](s64) - %0(s64) = COPY %x0 + %0(s64) = COPY $x0 %1(s32) = G_UITOFP %0 ... @@ -733,11 +733,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: test_fptosi_s64_s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 ; CHECK: [[FPTOSI:%[0-9]+]]:gpr(s64) = G_FPTOSI [[COPY]](s32) - %0(s32) = COPY %s0 + %0(s32) = COPY $s0 %1(s64) = G_FPTOSI %0 ... @@ -749,11 +749,11 @@ registers: - { id: 1, class: _ } body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: test_fptoui_s32_s64 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 ; CHECK: [[FPTOUI:%[0-9]+]]:gpr(s32) = G_FPTOUI [[COPY]](s64) - %0(s64) = COPY %d0 + %0(s64) = COPY $d0 %1(s32) = G_FPTOUI %0 ... @@ -772,10 +772,10 @@ body: | ; CHECK-LABEL: name: test_gphi_ptr ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: %w2, %x0, %x1 - ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr(p0) = COPY %x1 - ; CHECK: [[COPY2:%[0-9]+]]:gpr(s32) = COPY %w2 + ; CHECK: liveins: $w2, $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr(s32) = COPY $w2 ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY2]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 ; CHECK: G_BR %bb.2 @@ -783,15 +783,15 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:gpr(p0) = G_PHI [[COPY]](p0), %bb.0, [[COPY1]](p0), %bb.1 - ; CHECK: %x0 = COPY [[PHI]](p0) - ; CHECK: RET_ReallyLR implicit %x0 + ; CHECK: $x0 = COPY [[PHI]](p0) + ; CHECK: RET_ReallyLR implicit $x0 bb.0: successors: %bb.1, %bb.2 - liveins: %w2, %x0, %x1 + liveins: $w2, $x0, $x1 - %0(p0) = COPY %x0 - %1(p0) = COPY %x1 - %4(s32) = COPY %w2 + %0(p0) = COPY $x0 + %1(p0) = COPY $x1 + %4(s32) = COPY $w2 %2(s1) = G_TRUNC %4(s32) G_BRCOND %2(s1), %bb.1 G_BR %bb.2 @@ -802,7 +802,7 @@ body: | bb.2: %3(p0) = G_PHI %0(p0), %bb.0, %1(p0), %bb.1 - %x0 = COPY %3(p0) - RET_ReallyLR implicit %x0 + $x0 = COPY %3(p0) + RET_ReallyLR implicit $x0 ... diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-reg_sequence.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-reg_sequence.mir index 15ccf1f5459c..32a0d4335c74 100644 --- a/test/CodeGen/AArch64/GlobalISel/regbankselect-reg_sequence.mir +++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-reg_sequence.mir @@ -18,8 +18,8 @@ registers: - { id: 0, class: dd } body: | bb.0: - liveins: %d0, %d1 + liveins: $d0, $d1 - %0 = REG_SEQUENCE %d0, %subreg.dsub0, %d1, %subreg.dsub1 + %0 = REG_SEQUENCE $d0, %subreg.dsub0, $d1, %subreg.dsub1 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir b/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir index cab5489ab6f4..eafc8782986d 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -25,17 +25,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_xchg_i64 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr64 = MOVi64imm 1 ; CHECK: [[RES:%[0-9]+]]:gpr64 = SWPX [[CST]], [[COPY]] :: (load store monotonic 8 on %ir.addr) - ; CHECK: %x0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: $x0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 1 %2:gpr(s64) = G_ATOMICRMW_XCHG %0, %1 :: (load store monotonic 8 on %ir.addr) - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- name: atomicrmw_add_i64 @@ -44,17 +44,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_add_i64 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr64 = MOVi64imm 1 ; CHECK: [[RES:%[0-9]+]]:gpr64 = LDADDX [[CST]], [[COPY]] :: (load store monotonic 8 on %ir.addr) - ; CHECK: %x0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: $x0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 1 %2:gpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 8 on %ir.addr) - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- name: atomicrmw_add_i32 @@ -63,17 +63,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_add_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDADDALW [[CST]], [[COPY]] :: (load store seq_cst 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDADDALW [[CST]], [[COPY]] :: (load store seq_cst 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8 on %ir.addr) - %w0 = COPY %2(s32) + %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4 on %ir.addr) + $w0 = COPY %2(s32) ... --- @@ -83,17 +83,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_sub_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDADDALW [[CST]], [[COPY]] :: (load store seq_cst 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDADDALW [[CST]], [[COPY]] :: (load store seq_cst 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8 on %ir.addr) - %w0 = COPY %2(s32) + %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4 on %ir.addr) + $w0 = COPY %2(s32) ... --- @@ -103,18 +103,18 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_and_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CST2:%[0-9]+]]:gpr32 = ORNWrr %wzr, [[CST]] - ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDCLRAW [[CST2]], [[COPY]] :: (load store acquire 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[CST2:%[0-9]+]]:gpr32 = ORNWrr $wzr, [[CST]] + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDCLRAW [[CST2]], [[COPY]] :: (load store acquire 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_AND %0, %1 :: (load store acquire 8 on %ir.addr) - %w0 = COPY %2(s32) + %2:gpr(s32) = G_ATOMICRMW_AND %0, %1 :: (load store acquire 4 on %ir.addr) + $w0 = COPY %2(s32) ... --- @@ -124,17 +124,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_or_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDSETLW [[CST]], [[COPY]] :: (load store release 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDSETLW [[CST]], [[COPY]] :: (load store release 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_OR %0, %1 :: (load store release 8 on %ir.addr) - %w0 = COPY %2(s32) + %2:gpr(s32) = G_ATOMICRMW_OR %0, %1 :: (load store release 4 on %ir.addr) + $w0 = COPY %2(s32) ... --- @@ -144,17 +144,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_xor_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDEORALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDEORALW [[CST]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store acq_rel 8 on %ir.addr) - %w0 = COPY %2(s32) + %2:gpr(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store acq_rel 4 on %ir.addr) + $w0 = COPY %2(s32) ... --- @@ -164,17 +164,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_min_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDSMINALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDSMINALW [[CST]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store acq_rel 8 on %ir.addr) - %w0 = COPY %2(s32) + %2:gpr(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store acq_rel 4 on %ir.addr) + $w0 = COPY %2(s32) ... --- @@ -184,17 +184,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_max_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDSMAXALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDSMAXALW [[CST]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store acq_rel 8 on %ir.addr) - %w0 = COPY %2(s32) + %2:gpr(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store acq_rel 4 on %ir.addr) + $w0 = COPY %2(s32) ... --- @@ -204,17 +204,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_umin_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDUMINALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDUMINALW [[CST]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store acq_rel 8 on %ir.addr) - %w0 = COPY %2(s32) + %2:gpr(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store acq_rel 4 on %ir.addr) + $w0 = COPY %2(s32) ... --- @@ -224,15 +224,15 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: atomicrmw_umax_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDUMAXALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDUMAXALW [[CST]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store acq_rel 8 on %ir.addr) - %w0 = COPY %2(s32) + %2:gpr(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store acq_rel 4 on %ir.addr) + $w0 = COPY %2(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-binop.mir b/test/CodeGen/AArch64/GlobalISel/select-binop.mir index 1badcf35492d..f836a149fb22 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-binop.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-binop.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -70,17 +70,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: add_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[ADDWrr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[ADDWrr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_ADD %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -96,17 +96,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: add_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[ADDXrr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[ADDXrr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_ADD %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -121,16 +121,16 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: add_imm_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY]], 1, 0 - ; CHECK: %w0 = COPY [[ADDWri]] - %0(s32) = COPY %w0 + ; CHECK: $w0 = COPY [[ADDWri]] + %0(s32) = COPY $w0 %1(s32) = G_CONSTANT i32 1 %2(s32) = G_ADD %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -145,16 +145,16 @@ registers: body: | bb.0: - liveins: %x0, %w1 + liveins: $x0, $w1 ; CHECK-LABEL: name: add_imm_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri [[COPY]], 1, 0 - ; CHECK: %x0 = COPY [[ADDXri]] - %0(s64) = COPY %x0 + ; CHECK: $x0 = COPY [[ADDXri]] + %0(s64) = COPY $x0 %1(s64) = G_CONSTANT i32 1 %2(s64) = G_ADD %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -171,22 +171,22 @@ body: | ; CHECK-LABEL: name: add_imm_s32_gpr_bb ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 ; CHECK: B %bb.1 ; CHECK: bb.1: ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY]], 1, 0 - ; CHECK: %w0 = COPY [[ADDWri]] + ; CHECK: $w0 = COPY [[ADDWri]] bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 successors: %bb.1 - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_CONSTANT i32 1 G_BR %bb.1 bb.1: %2(s32) = G_ADD %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -202,17 +202,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: sub_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 - ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY1]], implicit-def %nzcv - ; CHECK: %w0 = COPY [[SUBSWrr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK: $w0 = COPY [[SUBSWrr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_SUB %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -228,17 +228,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: sub_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 - ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def %nzcv - ; CHECK: %x0 = COPY [[SUBSXrr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK: $x0 = COPY [[SUBSXrr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_SUB %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -254,17 +254,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: or_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[ORRWrr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[ORRWrr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_OR %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -280,17 +280,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: or_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[ORRXrr:%[0-9]+]]:gpr64 = ORRXrr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[ORRXrr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[ORRXrr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_OR %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -308,17 +308,17 @@ registers: # on 64-bit width vector. body: | bb.0: - liveins: %d0, %d1 + liveins: $d0, $d1 ; CHECK-LABEL: name: or_v2s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[ORRv8i8_:%[0-9]+]]:fpr64 = ORRv8i8 [[COPY]], [[COPY1]] - ; CHECK: %d0 = COPY [[ORRv8i8_]] - %0(<2 x s32>) = COPY %d0 - %1(<2 x s32>) = COPY %d1 + ; CHECK: $d0 = COPY [[ORRv8i8_]] + %0(<2 x s32>) = COPY $d0 + %1(<2 x s32>) = COPY $d1 %2(<2 x s32>) = G_OR %0, %1 - %d0 = COPY %2(<2 x s32>) + $d0 = COPY %2(<2 x s32>) ... --- @@ -334,17 +334,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: and_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32 = ANDWrr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[ANDWrr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[ANDWrr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_AND %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -360,17 +360,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: and_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[ANDXrr:%[0-9]+]]:gpr64 = ANDXrr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[ANDXrr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[ANDXrr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_AND %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -386,17 +386,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: shl_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[LSLVWr:%[0-9]+]]:gpr32 = LSLVWr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[LSLVWr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[LSLVWr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_SHL %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -412,17 +412,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: shl_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[LSLVXr:%[0-9]+]]:gpr64 = LSLVXr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[LSLVXr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[LSLVXr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_SHL %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -438,17 +438,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: lshr_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[LSRVWr:%[0-9]+]]:gpr32 = LSRVWr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[LSRVWr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[LSRVWr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_LSHR %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -464,17 +464,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: lshr_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[LSRVXr:%[0-9]+]]:gpr64 = LSRVXr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[LSRVXr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[LSRVXr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_LSHR %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -490,17 +490,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: ashr_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[ASRVWr:%[0-9]+]]:gpr32 = ASRVWr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[ASRVWr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[ASRVWr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_ASHR %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -516,17 +516,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: ashr_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[ASRVXr:%[0-9]+]]:gpr64 = ASRVXr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[ASRVXr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[ASRVXr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_ASHR %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -543,17 +543,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: mul_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 - ; CHECK: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], %wzr - ; CHECK: %w0 = COPY [[MADDWrrr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], $wzr + ; CHECK: $w0 = COPY [[MADDWrrr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_MUL %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -569,17 +569,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: mul_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 - ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY]], [[COPY1]], %xzr - ; CHECK: %x0 = COPY [[MADDXrrr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY]], [[COPY1]], $xzr + ; CHECK: $x0 = COPY [[MADDXrrr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_MUL %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -591,21 +591,21 @@ regBankSelected: true body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: mulh_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[SMULHrr:%[0-9]+]]:gpr64 = SMULHrr [[COPY]], [[COPY1]] ; CHECK: [[UMULHrr:%[0-9]+]]:gpr64 = UMULHrr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[SMULHrr]] - ; CHECK: %x0 = COPY [[UMULHrr]] - %0:gpr(s64) = COPY %x0 - %1:gpr(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[SMULHrr]] + ; CHECK: $x0 = COPY [[UMULHrr]] + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 %2:gpr(s64) = G_SMULH %0, %1 %3:gpr(s64) = G_UMULH %0, %1 - %x0 = COPY %2(s64) - %x0 = COPY %3(s64) + $x0 = COPY %2(s64) + $x0 = COPY %3(s64) ... --- @@ -621,17 +621,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: sdiv_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[SDIVWr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[SDIVWr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_SDIV %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -647,17 +647,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: sdiv_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[SDIVXr:%[0-9]+]]:gpr64 = SDIVXr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[SDIVXr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[SDIVXr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_SDIV %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -673,17 +673,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: udiv_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[UDIVWr:%[0-9]+]]:gpr32 = UDIVWr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[UDIVWr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[UDIVWr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_UDIV %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -699,17 +699,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: udiv_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[UDIVXr:%[0-9]+]]:gpr64 = UDIVXr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[UDIVXr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[UDIVXr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_UDIV %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -725,17 +725,17 @@ registers: body: | bb.0: - liveins: %s0, %s1 + liveins: $s0, $s1 ; CHECK-LABEL: name: fadd_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[COPY]], [[COPY1]] - ; CHECK: %s0 = COPY [[FADDSrr]] - %0(s32) = COPY %s0 - %1(s32) = COPY %s1 + ; CHECK: $s0 = COPY [[FADDSrr]] + %0(s32) = COPY $s0 + %1(s32) = COPY $s1 %2(s32) = G_FADD %0, %1 - %s0 = COPY %2(s32) + $s0 = COPY %2(s32) ... --- @@ -750,17 +750,17 @@ registers: body: | bb.0: - liveins: %d0, %d1 + liveins: $d0, $d1 ; CHECK-LABEL: name: fadd_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[COPY]], [[COPY1]] - ; CHECK: %d0 = COPY [[FADDDrr]] - %0(s64) = COPY %d0 - %1(s64) = COPY %d1 + ; CHECK: $d0 = COPY [[FADDDrr]] + %0(s64) = COPY $d0 + %1(s64) = COPY $d1 %2(s64) = G_FADD %0, %1 - %d0 = COPY %2(s64) + $d0 = COPY %2(s64) ... --- @@ -775,17 +775,17 @@ registers: body: | bb.0: - liveins: %s0, %s1 + liveins: $s0, $s1 ; CHECK-LABEL: name: fsub_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: [[FSUBSrr:%[0-9]+]]:fpr32 = FSUBSrr [[COPY]], [[COPY1]] - ; CHECK: %s0 = COPY [[FSUBSrr]] - %0(s32) = COPY %s0 - %1(s32) = COPY %s1 + ; CHECK: $s0 = COPY [[FSUBSrr]] + %0(s32) = COPY $s0 + %1(s32) = COPY $s1 %2(s32) = G_FSUB %0, %1 - %s0 = COPY %2(s32) + $s0 = COPY %2(s32) ... --- @@ -800,17 +800,17 @@ registers: body: | bb.0: - liveins: %d0, %d1 + liveins: $d0, $d1 ; CHECK-LABEL: name: fsub_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[FSUBDrr:%[0-9]+]]:fpr64 = FSUBDrr [[COPY]], [[COPY1]] - ; CHECK: %d0 = COPY [[FSUBDrr]] - %0(s64) = COPY %d0 - %1(s64) = COPY %d1 + ; CHECK: $d0 = COPY [[FSUBDrr]] + %0(s64) = COPY $d0 + %1(s64) = COPY $d1 %2(s64) = G_FSUB %0, %1 - %d0 = COPY %2(s64) + $d0 = COPY %2(s64) ... --- @@ -825,17 +825,17 @@ registers: body: | bb.0: - liveins: %s0, %s1 + liveins: $s0, $s1 ; CHECK-LABEL: name: fmul_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr [[COPY]], [[COPY1]] - ; CHECK: %s0 = COPY [[FMULSrr]] - %0(s32) = COPY %s0 - %1(s32) = COPY %s1 + ; CHECK: $s0 = COPY [[FMULSrr]] + %0(s32) = COPY $s0 + %1(s32) = COPY $s1 %2(s32) = G_FMUL %0, %1 - %s0 = COPY %2(s32) + $s0 = COPY %2(s32) ... --- @@ -850,17 +850,17 @@ registers: body: | bb.0: - liveins: %d0, %d1 + liveins: $d0, $d1 ; CHECK-LABEL: name: fmul_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[COPY]], [[COPY1]] - ; CHECK: %d0 = COPY [[FMULDrr]] - %0(s64) = COPY %d0 - %1(s64) = COPY %d1 + ; CHECK: $d0 = COPY [[FMULDrr]] + %0(s64) = COPY $d0 + %1(s64) = COPY $d1 %2(s64) = G_FMUL %0, %1 - %d0 = COPY %2(s64) + $d0 = COPY %2(s64) ... --- @@ -875,17 +875,17 @@ registers: body: | bb.0: - liveins: %s0, %s1 + liveins: $s0, $s1 ; CHECK-LABEL: name: fdiv_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: [[FDIVSrr:%[0-9]+]]:fpr32 = FDIVSrr [[COPY]], [[COPY1]] - ; CHECK: %s0 = COPY [[FDIVSrr]] - %0(s32) = COPY %s0 - %1(s32) = COPY %s1 + ; CHECK: $s0 = COPY [[FDIVSrr]] + %0(s32) = COPY $s0 + %1(s32) = COPY $s1 %2(s32) = G_FDIV %0, %1 - %s0 = COPY %2(s32) + $s0 = COPY %2(s32) ... --- @@ -900,15 +900,15 @@ registers: body: | bb.0: - liveins: %d0, %d1 + liveins: $d0, $d1 ; CHECK-LABEL: name: fdiv_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[FDIVDrr:%[0-9]+]]:fpr64 = FDIVDrr [[COPY]], [[COPY1]] - ; CHECK: %d0 = COPY [[FDIVDrr]] - %0(s64) = COPY %d0 - %1(s64) = COPY %d1 + ; CHECK: $d0 = COPY [[FDIVDrr]] + %0(s64) = COPY $d0 + %1(s64) = COPY $d1 %2(s64) = G_FDIV %0, %1 - %d0 = COPY %2(s64) + $d0 = COPY %2(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-bitcast-bigendian.mir b/test/CodeGen/AArch64/GlobalISel/select-bitcast-bigendian.mir index 35d39c8c5d41..03cc505b3e15 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-bitcast-bigendian.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-bitcast-bigendian.mir @@ -1,4 +1,4 @@ -# RUN: llc -O0 -mtriple=arm64eb-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=arm64eb-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- name: bitcast_v2f32_to_s64 legalized: true @@ -6,14 +6,13 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: bitcast_v2f32_to_s64 - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]] - ; CHECK: [[REV:%[0-9]+]]:fpr64 = REV64v2i32 [[COPY1]] - ; CHECK: %x0 = COPY [[REV]] - %0:fpr(<2 x s32>) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $x0 + ; CHECK: [[REV:%[0-9]+]]:fpr64 = REV64v2i32 [[COPY]] + ; CHECK: $x0 = COPY [[REV]] + %0:fpr(<2 x s32>) = COPY $x0 %1:fpr(s64) = G_BITCAST %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir index e323aa310d5c..0846f54289d7 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -26,15 +26,14 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: bitcast_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]] - ; CHECK: %w0 = COPY [[COPY1]] - %0(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY $w0 + ; CHECK: $w0 = COPY [[COPY]] + %0(s32) = COPY $w0 %1(s32) = G_BITCAST %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -48,15 +47,14 @@ registers: body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: bitcast_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]] - ; CHECK: %s0 = COPY [[COPY1]] - %0(s32) = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: $s0 = COPY [[COPY]] + %0(s32) = COPY $s0 %1(s32) = G_BITCAST %0 - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- @@ -70,15 +68,15 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: bitcast_s32_gpr_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]] - ; CHECK: %s0 = COPY [[COPY1]] - %0(s32) = COPY %w0 + ; CHECK: $s0 = COPY [[COPY1]] + %0(s32) = COPY $w0 %1(s32) = G_BITCAST %0 - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- @@ -92,15 +90,15 @@ registers: body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: bitcast_s32_fpr_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: %w0 = COPY [[COPY1]] - %0(s32) = COPY %s0 + ; CHECK: $w0 = COPY [[COPY1]] + %0(s32) = COPY $s0 %1(s32) = G_BITCAST %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -114,15 +112,14 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: bitcast_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[COPY]] - ; CHECK: %x0 = COPY [[COPY1]] - %0(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]] + %0(s64) = COPY $x0 %1(s64) = G_BITCAST %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -136,15 +133,14 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: bitcast_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]] - ; CHECK: %d0 = COPY [[COPY1]] - %0(s64) = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: $d0 = COPY [[COPY]] + %0(s64) = COPY $d0 %1(s64) = G_BITCAST %0 - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... --- @@ -157,15 +153,15 @@ registers: - { id: 1, class: fpr } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: bitcast_s64_gpr_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]] - ; CHECK: %d0 = COPY [[COPY1]] - %0(s64) = COPY %x0 + ; CHECK: $d0 = COPY [[COPY1]] + %0(s64) = COPY $x0 %1(s64) = G_BITCAST %0 - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... --- @@ -179,15 +175,15 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: bitcast_s64_fpr_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] - ; CHECK: %x0 = COPY [[COPY1]] - %0(s64) = COPY %d0 + ; CHECK: $x0 = COPY [[COPY1]] + %0(s64) = COPY $d0 %1(s64) = G_BITCAST %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -201,15 +197,14 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: bitcast_s64_v2f32_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]] - ; CHECK: %x0 = COPY [[COPY1]] - %0(s64) = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: $x0 = COPY [[COPY]] + %0(s64) = COPY $d0 %1(<2 x s32>) = G_BITCAST %0 - %x0 = COPY %1(<2 x s32>) + $x0 = COPY %1(<2 x s32>) ... --- @@ -223,13 +218,12 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: bitcast_s64_v8i8_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]] - ; CHECK: %x0 = COPY [[COPY1]] - %0(s64) = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: $x0 = COPY [[COPY]] + %0(s64) = COPY $d0 %1(<8 x s8>) = G_BITCAST %0 - %x0 = COPY %1(<8 x s8>) + $x0 = COPY %1(<8 x s8>) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-br.mir b/test/CodeGen/AArch64/GlobalISel/select-br.mir index 0d6108fe322d..03f16f784537 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-br.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-br.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -37,12 +37,12 @@ registers: # CHECK: body: # CHECK: bb.0: -# CHECK: TBNZW %0, 0, %bb.1 +# CHECK: TBNZW %1, 0, %bb.1 # CHECK: B %bb.0 body: | bb.0: successors: %bb.0, %bb.1 - %1(s32) = COPY %w0 + %1(s32) = COPY $w0 %0(s1) = G_TRUNC %1 G_BRCOND %0(s1), %bb.1 G_BR %bb.0 @@ -61,12 +61,12 @@ registers: # CHECK: body: # CHECK: bb.0: -# CHECK: %0:gpr64 = COPY %x0 +# CHECK: %0:gpr64 = COPY $x0 # CHECK: BR %0 body: | bb.0: successors: %bb.0, %bb.1 - %0(p0) = COPY %x0 + %0(p0) = COPY $x0 G_BRINDIRECT %0(p0) bb.1: diff --git a/test/CodeGen/AArch64/GlobalISel/select-bswap.mir b/test/CodeGen/AArch64/GlobalISel/select-bswap.mir index 17394fe86d2c..244803976fd6 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-bswap.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-bswap.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -19,15 +19,15 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: bswap_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[REVWr:%[0-9]+]]:gpr32 = REVWr [[COPY]] - ; CHECK: %w0 = COPY [[REVWr]] - %0(s32) = COPY %w0 + ; CHECK: $w0 = COPY [[REVWr]] + %0(s32) = COPY $w0 %1(s32) = G_BSWAP %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- @@ -41,13 +41,13 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: bswap_s64 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[REVXr:%[0-9]+]]:gpr64 = REVXr [[COPY]] - ; CHECK: %x0 = COPY [[REVXr]] - %0(s64) = COPY %x0 + ; CHECK: $x0 = COPY [[REVXr]] + %0(s64) = COPY $x0 %1(s64) = G_BSWAP %0 - %x0 = COPY %1 + $x0 = COPY %1 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-cbz.mir b/test/CodeGen/AArch64/GlobalISel/select-cbz.mir index f8f0126bdc3d..4075ba279c75 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-cbz.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-cbz.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | define void @cbz_s32() { ret void } @@ -15,15 +15,15 @@ regBankSelected: true # CHECK: body: # CHECK: bb.0: -# CHECK: %0:gpr32 = COPY %w0 +# CHECK: %0:gpr32 = COPY $w0 # CHECK: CBZW %0, %bb.1 # CHECK: B %bb.0 body: | bb.0: - liveins: %w0 + liveins: $w0 successors: %bb.0, %bb.1 - %0:gpr(s32) = COPY %w0 + %0:gpr(s32) = COPY $w0 %1:gpr(s32) = G_CONSTANT i32 0 %2:gpr(s32) = G_ICMP intpred(eq), %0, %1 %3:gpr(s1) = G_TRUNC %2(s32) @@ -41,15 +41,15 @@ regBankSelected: true # CHECK: body: # CHECK: bb.0: -# CHECK: %0:gpr64 = COPY %x0 +# CHECK: %0:gpr64 = COPY $x0 # CHECK: CBZX %0, %bb.1 # CHECK: B %bb.0 body: | bb.0: - liveins: %x0 + liveins: $x0 successors: %bb.0, %bb.1 - %0:gpr(s64) = COPY %x0 + %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 0 %2:gpr(s32) = G_ICMP intpred(eq), %0, %1 %3:gpr(s1) = G_TRUNC %2(s32) @@ -67,15 +67,15 @@ regBankSelected: true # CHECK: body: # CHECK: bb.0: -# CHECK: %0:gpr32 = COPY %w0 +# CHECK: %0:gpr32 = COPY $w0 # CHECK: CBNZW %0, %bb.1 # CHECK: B %bb.0 body: | bb.0: - liveins: %w0 + liveins: $w0 successors: %bb.0, %bb.1 - %0:gpr(s32) = COPY %w0 + %0:gpr(s32) = COPY $w0 %1:gpr(s32) = G_CONSTANT i32 0 %2:gpr(s32) = G_ICMP intpred(ne), %0, %1 %3:gpr(s1) = G_TRUNC %2(s32) @@ -93,15 +93,15 @@ regBankSelected: true # CHECK: body: # CHECK: bb.0: -# CHECK: %0:gpr64 = COPY %x0 +# CHECK: %0:gpr64 = COPY $x0 # CHECK: CBNZX %0, %bb.1 # CHECK: B %bb.0 body: | bb.0: - liveins: %x0 + liveins: $x0 successors: %bb.0, %bb.1 - %0:gpr(s64) = COPY %x0 + %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 0 %2:gpr(s32) = G_ICMP intpred(ne), %0, %1 %3:gpr(s1) = G_TRUNC %2(s32) diff --git a/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir b/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir index 67ce28ba8590..345c21d18fde 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -15,19 +15,19 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:gpr32 = MOVi32imm 0 ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[RES:%[0-9]+]]:gpr32 = CASW [[CMP]], [[CST]], [[COPY]] :: (load store monotonic 8 on %ir.addr) - ; CHECK: %w0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = CASW [[CMP]], [[CST]], [[COPY]] :: (load store monotonic 4 on %ir.addr) + ; CHECK: $w0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 0 %2:gpr(s32) = G_CONSTANT i32 1 - %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 8 on %ir.addr) - %w0 = COPY %3(s32) + %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 4 on %ir.addr) + $w0 = COPY %3(s32) ... --- @@ -37,17 +37,17 @@ regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: cmpxchg_i64 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:gpr64 = MOVi64imm 0 ; CHECK: [[CST:%[0-9]+]]:gpr64 = MOVi64imm 1 ; CHECK: [[RES:%[0-9]+]]:gpr64 = CASX [[CMP]], [[CST]], [[COPY]] :: (load store monotonic 8 on %ir.addr) - ; CHECK: %x0 = COPY [[RES]] - %0:gpr(p0) = COPY %x0 + ; CHECK: $x0 = COPY [[RES]] + %0:gpr(p0) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 0 %2:gpr(s64) = G_CONSTANT i64 1 %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 8 on %ir.addr) - %x0 = COPY %3(s64) + $x0 = COPY %3(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-constant.mir b/test/CodeGen/AArch64/GlobalISel/select-constant.mir index fbe2ef1f2c8a..e046aa290c13 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-constant.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-constant.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -24,9 +24,9 @@ body: | bb.0: ; CHECK-LABEL: name: const_s32 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 42 - ; CHECK: %w0 = COPY [[MOVi32imm]] + ; CHECK: $w0 = COPY [[MOVi32imm]] %0(s32) = G_CONSTANT i32 42 - %w0 = COPY %0(s32) + $w0 = COPY %0(s32) ... --- @@ -40,9 +40,9 @@ body: | bb.0: ; CHECK-LABEL: name: const_s64 ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 1234567890123 - ; CHECK: %x0 = COPY [[MOVi64imm]] + ; CHECK: $x0 = COPY [[MOVi64imm]] %0(s64) = G_CONSTANT i64 1234567890123 - %x0 = COPY %0(s64) + $x0 = COPY %0(s64) ... --- @@ -57,9 +57,9 @@ body: | ; CHECK-LABEL: name: fconst_s32 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1080033280 ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] - ; CHECK: %s0 = COPY [[COPY]] + ; CHECK: $s0 = COPY [[COPY]] %0(s32) = G_FCONSTANT float 3.5 - %s0 = COPY %0(s32) + $s0 = COPY %0(s32) ... --- @@ -74,9 +74,9 @@ body: | ; CHECK-LABEL: name: fconst_s64 ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 4607182418800017408 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY [[MOVi64imm]] - ; CHECK: %d0 = COPY [[COPY]] + ; CHECK: $d0 = COPY [[COPY]] %0(s64) = G_FCONSTANT double 1.0 - %d0 = COPY %0(s64) + $d0 = COPY %0(s64) ... --- @@ -90,9 +90,9 @@ body: | bb.0: ; CHECK-LABEL: name: fconst_s32_0 ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 - ; CHECK: %s0 = COPY [[FMOVS0_]] + ; CHECK: $s0 = COPY [[FMOVS0_]] %0(s32) = G_FCONSTANT float 0.0 - %s0 = COPY %0(s32) + $s0 = COPY %0(s32) ... --- @@ -106,7 +106,7 @@ body: | bb.0: ; CHECK-LABEL: name: fconst_s64_0 ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0 - ; CHECK: %x0 = COPY [[FMOVD0_]] + ; CHECK: $x0 = COPY [[FMOVD0_]] %0(s64) = G_FCONSTANT double 0.0 - %x0 = COPY %0(s64) + $x0 = COPY %0(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir index 7396ae57f8fd..f75d5629478d 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple arm64-- -run-pass=instruction-select -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple arm64-- -run-pass=instruction-select %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -25,12 +25,12 @@ !2 = !{} !3 = !{i32 2, !"Dwarf Version", i32 4} !4 = !{i32 2, !"Debug Info Version", i32 3} - !5 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) + !5 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !6 = !DISubroutineType(types: !2) !7 = !DILocalVariable(name: "in", arg: 1, scope: !5, file: !1, line: 1, type: !8) !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !9 = !DILocation(line: 1, column: 1, scope: !5) - !10 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) + !10 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !11 = !DILocalVariable(name: "in", arg: 1, scope: !10, file: !1, line: 1, type: !8) !12 = !DILocation(line: 1, column: 1, scope: !10) ... @@ -41,16 +41,16 @@ legalized: true regBankSelected: true body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_dbg_value - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY]] - ; CHECK: %w0 = COPY [[ADDWrr]] - ; CHECK: DBG_VALUE debug-use [[ADDWrr]], debug-use %noreg, !7, !DIExpression(), debug-location !9 - %0:gpr(s32) = COPY %w0 + ; CHECK: $w0 = COPY [[ADDWrr]] + ; CHECK: DBG_VALUE debug-use [[ADDWrr]], debug-use $noreg, !7, !DIExpression(), debug-location !9 + %0:gpr(s32) = COPY $w0 %1:gpr(s32) = G_ADD %0, %0 - %w0 = COPY %1(s32) - DBG_VALUE debug-use %1(s32), debug-use %noreg, !7, !DIExpression(), debug-location !9 + $w0 = COPY %1(s32) + DBG_VALUE debug-use %1(s32), debug-use $noreg, !7, !DIExpression(), debug-location !9 ... --- @@ -59,10 +59,10 @@ legalized: true regBankSelected: true body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_dbg_value_dead ; CHECK-NOT: COPY - ; CHECK: DBG_VALUE debug-use %noreg, debug-use %noreg, !7, !DIExpression(), debug-location !9 - %0:gpr(s32) = COPY %w0 - DBG_VALUE debug-use %0(s32), debug-use %noreg, !7, !DIExpression(), debug-location !9 + ; CHECK: DBG_VALUE debug-use $noreg, debug-use $noreg, !7, !DIExpression(), debug-location !9 + %0:gpr(s32) = COPY $w0 + DBG_VALUE debug-use %0(s32), debug-use $noreg, !7, !DIExpression(), debug-location !9 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-extload.mir b/test/CodeGen/AArch64/GlobalISel/select-extload.mir new file mode 100644 index 000000000000..e3b8f7fb40bc --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-extload.mir @@ -0,0 +1,48 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + define void @aextload_s32_from_s16(i16 *%addr) { ret void } + + define void @aextload_s32_from_s16_not_combined(i16 *%addr) { ret void } +... + +--- +name: aextload_s32_from_s16 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: aextload_s32_from_s16 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: $w0 = COPY [[T0]] + %0:gpr(p0) = COPY $x0 + %1:gpr(s32) = G_LOAD %0 :: (load 2 from %ir.addr) + $w0 = COPY %1(s32) +... + +--- +name: aextload_s32_from_s16_not_combined +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: aextload_s32_from_s16 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[T1:%[0-9]+]]:gpr32all = COPY [[T0]] + ; CHECK: $w0 = COPY [[T1]] + %0:gpr(p0) = COPY $x0 + %1:gpr(s16) = G_LOAD %0 :: (load 2 from %ir.addr) + %2:gpr(s32) = G_ANYEXT %1 + $w0 = COPY %2(s32) +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-fma.mir b/test/CodeGen/AArch64/GlobalISel/select-fma.mir index 3e8743c3ce80..adac2ec253a8 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-fma.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-fma.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -20,17 +20,17 @@ registers: body: | bb.0: - liveins: %w0, %w1, %w2 + liveins: $w0, $w1, $w2 ; CHECK-LABEL: name: FMADDSrrr_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %w1 - ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY %w2 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $w2 ; CHECK: [[FMADDSrrr:%[0-9]+]]:fpr32 = FMADDSrrr [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK: %w0 = COPY [[FMADDSrrr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 - %2(s32) = COPY %w2 + ; CHECK: $w0 = COPY [[FMADDSrrr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 + %2(s32) = COPY $w2 %3(s32) = G_FMA %0, %1, %2 - %w0 = COPY %3 + $w0 = COPY %3 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir index a163ba1db328..c186ccc36497 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -44,15 +44,15 @@ registers: body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: fptrunc_s16_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]] - ; CHECK: %h0 = COPY [[FCVTHSr]] - %0(s32) = COPY %s0 + ; CHECK: $h0 = COPY [[FCVTHSr]] + %0(s32) = COPY $s0 %1(s16) = G_FPTRUNC %0 - %h0 = COPY %1(s16) + $h0 = COPY %1(s16) ... --- @@ -66,15 +66,15 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: fptrunc_s16_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[FCVTHDr:%[0-9]+]]:fpr16 = FCVTHDr [[COPY]] - ; CHECK: %h0 = COPY [[FCVTHDr]] - %0(s64) = COPY %d0 + ; CHECK: $h0 = COPY [[FCVTHDr]] + %0(s64) = COPY $d0 %1(s16) = G_FPTRUNC %0 - %h0 = COPY %1(s16) + $h0 = COPY %1(s16) ... --- @@ -88,15 +88,15 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: fptrunc_s32_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[FCVTSDr:%[0-9]+]]:fpr32 = FCVTSDr [[COPY]] - ; CHECK: %s0 = COPY [[FCVTSDr]] - %0(s64) = COPY %d0 + ; CHECK: $s0 = COPY [[FCVTSDr]] + %0(s64) = COPY $d0 %1(s32) = G_FPTRUNC %0 - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- @@ -110,15 +110,15 @@ registers: body: | bb.0: - liveins: %h0 + liveins: $h0 ; CHECK-LABEL: name: fpext_s32_s16_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY %h0 + ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0 ; CHECK: [[FCVTSHr:%[0-9]+]]:fpr32 = FCVTSHr [[COPY]] - ; CHECK: %s0 = COPY [[FCVTSHr]] - %0(s16) = COPY %h0 + ; CHECK: $s0 = COPY [[FCVTSHr]] + %0(s16) = COPY $h0 %1(s32) = G_FPEXT %0 - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- @@ -132,15 +132,15 @@ registers: body: | bb.0: - liveins: %h0 + liveins: $h0 ; CHECK-LABEL: name: fpext_s64_s16_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY %h0 + ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0 ; CHECK: [[FCVTDHr:%[0-9]+]]:fpr64 = FCVTDHr [[COPY]] - ; CHECK: %d0 = COPY [[FCVTDHr]] - %0(s16) = COPY %h0 + ; CHECK: $d0 = COPY [[FCVTDHr]] + %0(s16) = COPY $h0 %1(s64) = G_FPEXT %0 - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... --- @@ -154,15 +154,15 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: fpext_s64_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK: [[FCVTDSr:%[0-9]+]]:fpr64 = FCVTDSr [[COPY]] - ; CHECK: %d0 = COPY [[FCVTDSr]] - %0(s32) = COPY %s0 + ; CHECK: $d0 = COPY [[FCVTDSr]] + %0(s32) = COPY $s0 %1(s64) = G_FPEXT %0 - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... --- @@ -176,15 +176,15 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: sitofp_s32_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[SCVTFUWSri:%[0-9]+]]:fpr32 = SCVTFUWSri [[COPY]] - ; CHECK: %s0 = COPY [[SCVTFUWSri]] - %0(s32) = COPY %w0 + ; CHECK: $s0 = COPY [[SCVTFUWSri]] + %0(s32) = COPY $w0 %1(s32) = G_SITOFP %0 - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- @@ -198,15 +198,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: sitofp_s32_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[SCVTFUXSri:%[0-9]+]]:fpr32 = SCVTFUXSri [[COPY]] - ; CHECK: %s0 = COPY [[SCVTFUXSri]] - %0(s64) = COPY %x0 + ; CHECK: $s0 = COPY [[SCVTFUXSri]] + %0(s64) = COPY $x0 %1(s32) = G_SITOFP %0 - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- @@ -220,15 +220,15 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: sitofp_s64_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[SCVTFUWDri:%[0-9]+]]:fpr64 = SCVTFUWDri [[COPY]] - ; CHECK: %d0 = COPY [[SCVTFUWDri]] - %0(s32) = COPY %w0 + ; CHECK: $d0 = COPY [[SCVTFUWDri]] + %0(s32) = COPY $w0 %1(s64) = G_SITOFP %0 - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... --- @@ -242,15 +242,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: sitofp_s64_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[SCVTFUXDri:%[0-9]+]]:fpr64 = SCVTFUXDri [[COPY]] - ; CHECK: %d0 = COPY [[SCVTFUXDri]] - %0(s64) = COPY %x0 + ; CHECK: $d0 = COPY [[SCVTFUXDri]] + %0(s64) = COPY $x0 %1(s64) = G_SITOFP %0 - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... --- @@ -264,15 +264,15 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: uitofp_s32_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[UCVTFUWSri:%[0-9]+]]:fpr32 = UCVTFUWSri [[COPY]] - ; CHECK: %s0 = COPY [[UCVTFUWSri]] - %0(s32) = COPY %w0 + ; CHECK: $s0 = COPY [[UCVTFUWSri]] + %0(s32) = COPY $w0 %1(s32) = G_UITOFP %0 - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- @@ -286,15 +286,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: uitofp_s32_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[UCVTFUXSri:%[0-9]+]]:fpr32 = UCVTFUXSri [[COPY]] - ; CHECK: %s0 = COPY [[UCVTFUXSri]] - %0(s64) = COPY %x0 + ; CHECK: $s0 = COPY [[UCVTFUXSri]] + %0(s64) = COPY $x0 %1(s32) = G_UITOFP %0 - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- @@ -308,15 +308,15 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: uitofp_s64_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[UCVTFUWDri:%[0-9]+]]:fpr64 = UCVTFUWDri [[COPY]] - ; CHECK: %d0 = COPY [[UCVTFUWDri]] - %0(s32) = COPY %w0 + ; CHECK: $d0 = COPY [[UCVTFUWDri]] + %0(s32) = COPY $w0 %1(s64) = G_UITOFP %0 - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... --- @@ -330,15 +330,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: uitofp_s64_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[UCVTFUXDri:%[0-9]+]]:fpr64 = UCVTFUXDri [[COPY]] - ; CHECK: %d0 = COPY [[UCVTFUXDri]] - %0(s64) = COPY %x0 + ; CHECK: $d0 = COPY [[UCVTFUXDri]] + %0(s64) = COPY $x0 %1(s64) = G_UITOFP %0 - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... --- @@ -352,15 +352,15 @@ registers: body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: fptosi_s32_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK: [[FCVTZSUWSr:%[0-9]+]]:gpr32 = FCVTZSUWSr [[COPY]] - ; CHECK: %w0 = COPY [[FCVTZSUWSr]] - %0(s32) = COPY %s0 + ; CHECK: $w0 = COPY [[FCVTZSUWSr]] + %0(s32) = COPY $s0 %1(s32) = G_FPTOSI %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -374,15 +374,15 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: fptosi_s32_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[FCVTZSUWDr:%[0-9]+]]:gpr32 = FCVTZSUWDr [[COPY]] - ; CHECK: %w0 = COPY [[FCVTZSUWDr]] - %0(s64) = COPY %d0 + ; CHECK: $w0 = COPY [[FCVTZSUWDr]] + %0(s64) = COPY $d0 %1(s32) = G_FPTOSI %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -396,15 +396,15 @@ registers: body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: fptosi_s64_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK: [[FCVTZSUXSr:%[0-9]+]]:gpr64 = FCVTZSUXSr [[COPY]] - ; CHECK: %x0 = COPY [[FCVTZSUXSr]] - %0(s32) = COPY %s0 + ; CHECK: $x0 = COPY [[FCVTZSUXSr]] + %0(s32) = COPY $s0 %1(s64) = G_FPTOSI %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -418,15 +418,15 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: fptosi_s64_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[FCVTZSUXDr:%[0-9]+]]:gpr64 = FCVTZSUXDr [[COPY]] - ; CHECK: %x0 = COPY [[FCVTZSUXDr]] - %0(s64) = COPY %d0 + ; CHECK: $x0 = COPY [[FCVTZSUXDr]] + %0(s64) = COPY $d0 %1(s64) = G_FPTOSI %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -440,15 +440,15 @@ registers: body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: fptoui_s32_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK: [[FCVTZUUWSr:%[0-9]+]]:gpr32 = FCVTZUUWSr [[COPY]] - ; CHECK: %w0 = COPY [[FCVTZUUWSr]] - %0(s32) = COPY %s0 + ; CHECK: $w0 = COPY [[FCVTZUUWSr]] + %0(s32) = COPY $s0 %1(s32) = G_FPTOUI %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -462,15 +462,15 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: fptoui_s32_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[FCVTZUUWDr:%[0-9]+]]:gpr32 = FCVTZUUWDr [[COPY]] - ; CHECK: %w0 = COPY [[FCVTZUUWDr]] - %0(s64) = COPY %d0 + ; CHECK: $w0 = COPY [[FCVTZUUWDr]] + %0(s64) = COPY $d0 %1(s32) = G_FPTOUI %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -484,15 +484,15 @@ registers: body: | bb.0: - liveins: %s0 + liveins: $s0 ; CHECK-LABEL: name: fptoui_s64_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK: [[FCVTZUUXSr:%[0-9]+]]:gpr64 = FCVTZUUXSr [[COPY]] - ; CHECK: %x0 = COPY [[FCVTZUUXSr]] - %0(s32) = COPY %s0 + ; CHECK: $x0 = COPY [[FCVTZUUXSr]] + %0(s32) = COPY $s0 %1(s64) = G_FPTOUI %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -506,13 +506,13 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: fptoui_s64_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[FCVTZUUXDr:%[0-9]+]]:gpr64 = FCVTZUUXDr [[COPY]] - ; CHECK: %x0 = COPY [[FCVTZUUXDr]] - %0(s64) = COPY %d0 + ; CHECK: $x0 = COPY [[FCVTZUUXDr]] + %0(s64) = COPY $d0 %1(s64) = G_FPTOUI %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir b/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir new file mode 100644 index 000000000000..ba4fdb5fa25b --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir @@ -0,0 +1,62 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + + @foo1 = common global [1073741824 x i32] zeroinitializer, align 4 + @foo2 = common global [1073741824 x i32] zeroinitializer, align 4 + + define i32 @gv_large() { + entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + %0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4 + %1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4 + %add = add nsw i32 %0, %1 + ret i32 %add + } + +... +--- +name: gv_large +legalized: true +regBankSelected: true +stack: + - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +constants: +body: | + bb.1: + ; CHECK-LABEL: name: gv_large + ; CHECK: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo1, 0 + ; CHECK: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @foo1, 16 + ; CHECK: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @foo1, 32 + ; CHECK: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @foo1, 48 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY [[MOVKXi2]] + ; CHECK: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo2, 0 + ; CHECK: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @foo2, 16 + ; CHECK: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @foo2, 32 + ; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]] + ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store 4 into %ir.retval) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] + ; CHECK: $w0 = COPY [[ADDWrr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr(s32) = G_CONSTANT i32 0 + %4:gpr(p0) = G_GLOBAL_VALUE @foo1 + %3:gpr(p0) = COPY %4(p0) + %7:gpr(p0) = G_GLOBAL_VALUE @foo2 + %6:gpr(p0) = COPY %7(p0) + %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval + G_STORE %1(s32), %0(p0) :: (store 4 into %ir.retval) + %2:gpr(s32) = G_LOAD %3(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + %5:gpr(s32) = G_LOAD %6(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + %8:gpr(s32) = G_ADD %2, %5 + $w0 = COPY %8(s32) + RET_ReallyLR implicit $w0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-imm.mir b/test/CodeGen/AArch64/GlobalISel/select-imm.mir index 28fb4b396531..4010a164cde4 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-imm.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-imm.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -20,13 +20,13 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: imm_s32_gpr ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm -1234 - ; CHECK: %w0 = COPY [[MOVi32imm]] + ; CHECK: $w0 = COPY [[MOVi32imm]] %0(s32) = G_CONSTANT i32 -1234 - %w0 = COPY %0(s32) + $w0 = COPY %0(s32) ... --- @@ -40,11 +40,11 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: imm_s64_gpr ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 1234 - ; CHECK: %x0 = COPY [[MOVi64imm]] + ; CHECK: $x0 = COPY [[MOVi64imm]] %0(s64) = G_CONSTANT i64 1234 - %x0 = COPY %0(s64) + $x0 = COPY %0(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir b/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir index 7b65fe3bf7dc..cdbd34f94548 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir @@ -1,10 +1,11 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define void @implicit_def() { ret void } + define void @implicit_def_copy() { ret void } ... --- @@ -20,8 +21,26 @@ body: | ; CHECK-LABEL: name: implicit_def ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[DEF]], [[DEF]] - ; CHECK: %w0 = COPY [[ADDWrr]] + ; CHECK: $w0 = COPY [[ADDWrr]] %0(s32) = G_IMPLICIT_DEF %1(s32) = G_ADD %0, %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) +... +--- +name: implicit_def_copy +legalized: true +regBankSelected: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + +body: | + bb.0: + ; CHECK-LABEL: name: implicit_def_copy + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY [[DEF]] + ; CHECK: $w0 = COPY [[COPY]] + %0(s32) = G_IMPLICIT_DEF + %1(s32) = COPY %0(s32) + $w0 = COPY %1(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir b/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir index 33b483511065..51c2c163b2ca 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir @@ -1,54 +1,114 @@ -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- -# CHECK-LABEL: name: insert_gprs -name: insert_gprs +name: insert_gprx legalized: true regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 - %0:gpr(s32) = COPY %w0 + ; CHECK-LABEL: name: insert_gprx + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 + ; CHECK: [[BFMXri:%[0-9]+]]:gpr64 = BFMXri [[DEF]], [[SUBREG_TO_REG]], 0, 31 + ; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 + ; CHECK: [[BFMXri1:%[0-9]+]]:gpr64 = BFMXri [[DEF]], [[SUBREG_TO_REG1]], 51, 31 + ; CHECK: $x0 = COPY [[BFMXri]] + ; CHECK: $x1 = COPY [[BFMXri1]] + %0:gpr(s32) = COPY $w0 %1:gpr(s64) = G_IMPLICIT_DEF - ; CHECK: body: - ; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32 - ; CHECK: %2:gpr64 = BFMXri %1, [[TMP]], 0, 31 %2:gpr(s64) = G_INSERT %1, %0, 0 - ; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32 - ; CHECK: %3:gpr64 = BFMXri %1, [[TMP]], 51, 31 %3:gpr(s64) = G_INSERT %1, %0, 13 - %x0 = COPY %2 - %x1 = COPY %3 + $x0 = COPY %2 + $x1 = COPY %3 ... +--- +name: insert_gprw +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: insert_gprw + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY]], 0, 15 + ; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY]], 16, 15 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[BFMWri1]] + ; CHECK: $w0 = COPY [[COPY1]] + %1:gpr(s32) = COPY $w0 + %2:gpr(s32) = COPY $w1 + %3:gpr(s16) = G_TRUNC %1(s32) + %4:gpr(s16) = G_TRUNC %1(s32) + %5:gpr(s32) = G_IMPLICIT_DEF + %6:gpr(s32) = G_INSERT %5, %3(s16), 0 + %7:gpr(s32) = G_INSERT %6, %4(s16), 16 + %0:gpr(s32) = COPY %7(s32) + $w0 = COPY %0 +... --- -# CHECK-LABEL: name: extract_gprs name: extract_gprs legalized: true regBankSelected: true body: | bb.0: - liveins: %x0 + liveins: $x0 - %0:gpr(s64) = COPY %x0 + ; CHECK-LABEL: name: extract_gprs + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[COPY]], 0, 31 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[UBFMXri]].sub_32 + ; CHECK: [[UBFMXri1:%[0-9]+]]:gpr64 = UBFMXri [[COPY]], 13, 44 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[UBFMXri1]].sub_32 + ; CHECK: $w0 = COPY [[COPY1]] + ; CHECK: $w1 = COPY [[COPY2]] + %0:gpr(s64) = COPY $x0 - ; CHECK: body: - ; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 0, 31 - ; CHECK: %1:gpr32 = COPY [[TMP]].sub_32 %1:gpr(s32) = G_EXTRACT %0, 0 - ; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 13, 44 - ; CHECK: %2:gpr32 = COPY [[TMP]].sub_32 %2:gpr(s32) = G_EXTRACT %0, 13 - %w0 = COPY %1 - %w1 = COPY %2 + $w0 = COPY %1 + $w1 = COPY %2 +... + +--- +name: extract_gprw +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $w0 + + ; CHECK-LABEL: name: extract_gprw + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 0, 15 + ; CHECK: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 15, 30 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[UBFMWri]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub + ; CHECK: $h0 = COPY [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]] + ; CHECK: [[COPY4:%[0-9]+]]:fpr16 = COPY [[COPY3]].hsub + ; CHECK: $h1 = COPY [[COPY4]] + %0:gpr(s32) = COPY $w0 + + %1:gpr(s16) = G_EXTRACT %0, 0 + + %2:gpr(s16) = G_EXTRACT %0, 15 + + $h0 = COPY %1 + $h1 = COPY %2 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir index bd75c4e661ea..18764b334996 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -29,16 +29,15 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: anyext_s64_from_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY $w0 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[SUBREG_TO_REG]] - ; CHECK: %x0 = COPY [[COPY1]] - %0(s32) = COPY %w0 + ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] + %0(s32) = COPY $w0 %1(s64) = G_ANYEXT %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -52,17 +51,16 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: anyext_s32_from_s8 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] - ; CHECK: %w0 = COPY [[COPY2]] - %2:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY]] + ; CHECK: $w0 = COPY [[COPY2]] + %2:gpr(s32) = COPY $w0 %0(s8) = G_TRUNC %2 %1(s32) = G_ANYEXT %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -76,16 +74,16 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: zext_s64_from_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 - ; CHECK: %x0 = COPY [[UBFMXri]] - %0(s32) = COPY %w0 + ; CHECK: $x0 = COPY [[UBFMXri]] + %0(s32) = COPY $w0 %1(s64) = G_ZEXT %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -99,17 +97,16 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: zext_s32_from_s16 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 15 - ; CHECK: %w0 = COPY [[UBFMWri]] - %2:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 0, 15 + ; CHECK: $w0 = COPY [[UBFMWri]] + %2:gpr(s32) = COPY $w0 %0(s16) = G_TRUNC %2 %1(s32) = G_ZEXT %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- @@ -123,17 +120,16 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: zext_s32_from_s8 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 15 - ; CHECK: %w0 = COPY [[UBFMWri]] - %2:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 0, 15 + ; CHECK: $w0 = COPY [[UBFMWri]] + %2:gpr(s32) = COPY $w0 %0(s16) = G_TRUNC %2 %1(s32) = G_ZEXT %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -147,19 +143,18 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: zext_s16_from_s8 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 7 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 0, 7 ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[UBFMWri]] - ; CHECK: %w0 = COPY [[COPY2]] - %2:gpr(s32) = COPY %w0 + ; CHECK: $w0 = COPY [[COPY2]] + %2:gpr(s32) = COPY $w0 %0(s8) = G_TRUNC %2 %1(s16) = G_ZEXT %0 %3:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %3(s32) + $w0 = COPY %3(s32) ... --- @@ -173,16 +168,16 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: sext_s64_from_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 ; CHECK: [[SBFMXri:%[0-9]+]]:gpr64 = SBFMXri [[SUBREG_TO_REG]], 0, 31 - ; CHECK: %x0 = COPY [[SBFMXri]] - %0(s32) = COPY %w0 + ; CHECK: $x0 = COPY [[SBFMXri]] + %0(s32) = COPY $w0 %1(s64) = G_SEXT %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -196,17 +191,16 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: sext_s32_from_s16 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: [[SBFMWri:%[0-9]+]]:gpr32 = SBFMWri [[COPY1]], 0, 15 - ; CHECK: %w0 = COPY [[SBFMWri]] - %2:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[SBFMWri:%[0-9]+]]:gpr32 = SBFMWri [[COPY]], 0, 15 + ; CHECK: $w0 = COPY [[SBFMWri]] + %2:gpr(s32) = COPY $w0 %0(s16) = G_TRUNC %2 %1(s32) = G_SEXT %0 - %w0 = COPY %1 + $w0 = COPY %1 ... --- @@ -220,17 +214,16 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: sext_s32_from_s8 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: [[SBFMWri:%[0-9]+]]:gpr32 = SBFMWri [[COPY1]], 0, 7 - ; CHECK: %w0 = COPY [[SBFMWri]] - %2:gpr(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[SBFMWri:%[0-9]+]]:gpr32 = SBFMWri [[COPY]], 0, 7 + ; CHECK: $w0 = COPY [[SBFMWri]] + %2:gpr(s32) = COPY $w0 %0(s8) = G_TRUNC %2 %1(s32) = G_SEXT %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -244,17 +237,16 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: sext_s16_from_s8 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: [[SBFMWri:%[0-9]+]]:gpr32 = SBFMWri [[COPY1]], 0, 7 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[SBFMWri:%[0-9]+]]:gpr32 = SBFMWri [[COPY]], 0, 7 ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SBFMWri]] - ; CHECK: %w0 = COPY [[COPY2]] - %2:gpr(s32) = COPY %w0 + ; CHECK: $w0 = COPY [[COPY2]] + %2:gpr(s32) = COPY $w0 %0(s8) = G_TRUNC %2 %1(s16) = G_SEXT %0 %3:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %3(s32) + $w0 = COPY %3(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir index 405634a00aa7..31a44089f0c3 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -22,14 +22,13 @@ registers: - { id: 1, class: gpr } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: inttoptr_p0_s64 - ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[COPY]] - ; CHECK: %x0 = COPY [[COPY1]] - %0(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]] + %0(s64) = COPY $x0 %1(p0) = G_INTTOPTR %0 - %x0 = COPY %1(p0) + $x0 = COPY %1(p0) ... --- @@ -42,14 +41,13 @@ registers: - { id: 1, class: gpr } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: ptrtoint_s64_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] - ; CHECK: %x0 = COPY [[COPY1]] - %0(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]] + %0(p0) = COPY $x0 %1(s64) = G_PTRTOINT %0 - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -62,14 +60,14 @@ registers: - { id: 1, class: gpr } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: ptrtoint_s32_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 - ; CHECK: %w0 = COPY [[COPY1]] - %0(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[COPY1]] + %0(p0) = COPY $x0 %1(s32) = G_PTRTOINT %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -82,16 +80,16 @@ registers: - { id: 1, class: gpr } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: ptrtoint_s16_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] - ; CHECK: %w0 = COPY [[COPY2]] - %0(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[COPY2]] + %0(p0) = COPY $x0 %1(s16) = G_PTRTOINT %0 %2:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -104,16 +102,16 @@ registers: - { id: 1, class: gpr } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: ptrtoint_s8_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] - ; CHECK: %w0 = COPY [[COPY2]] - %0(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[COPY2]] + %0(p0) = COPY $x0 %1(s8) = G_PTRTOINT %0 %2:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -126,14 +124,14 @@ registers: - { id: 1, class: gpr } body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: ptrtoint_s1_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] - ; CHECK: %w0 = COPY [[COPY2]] - %0(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[COPY2]] + %0(p0) = COPY $x0 %1(s1) = G_PTRTOINT %0 %2:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-hint.mir b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-hint.mir index 61eff7c02bfe..231785355c83 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-hint.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-hint.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -22,7 +22,7 @@ registers: # CHECK: HINT 1 body: | bb.0: - liveins: %w0 + liveins: $w0 %0(s32) = G_CONSTANT i32 1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.hint), %0 diff --git a/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-sdiv.mir b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-sdiv.mir index 0387d7ab8ba4..6c0cf42553ff 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-sdiv.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-sdiv.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -21,15 +21,15 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: sdiv_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[SDIVWr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[SDIVWr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.sdiv.i32), %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-intrinsic-crypto-aesmc.mir b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-crypto-aesmc.mir index fe457b8bffca..1e33c5b59b97 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-intrinsic-crypto-aesmc.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-crypto-aesmc.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -mattr=+fuse-aes -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -mattr=+fuse-aes -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- # Check that we select the aarch64_crypto_aesmc and aarch64_crypto_aese @@ -10,17 +10,17 @@ regBankSelected: true body: | bb.0: - liveins: %q0, %q1 + liveins: $q0, $q1 ; CHECK-LABEL: name: aesmc_aese - ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY %q0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY %q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[T0:%[0-9]+]]:fpr128 = AESErr [[COPY]], [[COPY1]] ; CHECK: [[T1:%[0-9]+]]:fpr128 = AESMCrrTied [[T0]] - ; CHECK: %q0 = COPY [[T1]] - %0:fpr(<16 x s8>) = COPY %q0 - %1:fpr(<16 x s8>) = COPY %q1 + ; CHECK: $q0 = COPY [[T1]] + %0:fpr(<16 x s8>) = COPY $q0 + %1:fpr(<16 x s8>) = COPY $q1 %2:fpr(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.aarch64.crypto.aese), %0, %1 %3:fpr(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.aarch64.crypto.aesmc), %2 - %q0 = COPY %3(<16 x s8>) + $q0 = COPY %3(<16 x s8>) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-load.mir b/test/CodeGen/AArch64/GlobalISel/select-load.mir index 5c030f931dde..713a5b202b49 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-load.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-load.mir @@ -1,12 +1,14 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define void @load_s64_gpr(i64* %addr) { ret void } define void @load_s32_gpr(i32* %addr) { ret void } + define void @load_s16_gpr_anyext(i16* %addr) { ret void } define void @load_s16_gpr(i16* %addr) { ret void } + define void @load_s8_gpr_anyext(i8* %addr) { ret void } define void @load_s8_gpr(i8* %addr) { ret void } define void @load_fi_s64_gpr() { @@ -30,10 +32,6 @@ define void @load_gep_32_s8_fpr(i8* %addr) { ret void } define void @load_v2s32(i64 *%addr) { ret void } - - define void @sextload_s32_from_s16(i16 *%addr) { ret void } - define void @zextload_s32_from_s16(i16 *%addr) { ret void } - define void @aextload_s32_from_s16(i16 *%addr) { ret void } ... --- @@ -47,15 +45,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 0 :: (load 8 from %ir.addr) - ; CHECK: %x0 = COPY [[LDRXui]] - %0(p0) = COPY %x0 + ; CHECK: $x0 = COPY [[LDRXui]] + %0(p0) = COPY $x0 %1(s64) = G_LOAD %0 :: (load 8 from %ir.addr) - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -69,15 +67,33 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.addr) - ; CHECK: %w0 = COPY [[LDRWui]] - %0(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[LDRWui]] + %0(p0) = COPY $x0 %1(s32) = G_LOAD %0 :: (load 4 from %ir.addr) - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) +... + +--- +name: load_s16_gpr_anyext +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: load_s16_gpr_anyext + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: $w0 = COPY [[LDRHHui]] + %0:gpr(p0) = COPY $x0 + %1:gpr(s32) = G_LOAD %0 :: (load 2 from %ir.addr) + $w0 = COPY %1(s32) ... --- @@ -91,16 +107,35 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_s16_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) - ; CHECK: %w0 = COPY [[LDRHHui]] - %0(p0) = COPY %x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] + ; CHECK: $w0 = COPY [[T0]] + %0(p0) = COPY $x0 %1(s16) = G_LOAD %0 :: (load 2 from %ir.addr) %2:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) +... + +--- +name: load_s8_gpr_anyext +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: load_s8_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[LDRBBui]] + %0:gpr(p0) = COPY $x0 + %1:gpr(s32) = G_LOAD %0 :: (load 1 from %ir.addr) + $w0 = COPY %1(s32) ... --- @@ -114,16 +149,17 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_s8_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.addr) - ; CHECK: %w0 = COPY [[LDRBBui]] - %0(p0) = COPY %x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] + ; CHECK: $w0 = COPY [[T0]] + %0(p0) = COPY $x0 %1(s8) = G_LOAD %0 :: (load 1 from %ir.addr) %2:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -140,14 +176,14 @@ stack: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_fi_s64_gpr ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load 8) - ; CHECK: %x0 = COPY [[LDRXui]] + ; CHECK: $x0 = COPY [[LDRXui]] %0(p0) = G_FRAME_INDEX %stack.0.ptr0 %1(s64) = G_LOAD %0 :: (load 8) - %x0 = COPY %1(s64) + $x0 = COPY %1(s64) ... --- @@ -163,17 +199,17 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_gep_128_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 16 :: (load 8 from %ir.addr) - ; CHECK: %x0 = COPY [[LDRXui]] - %0(p0) = COPY %x0 + ; CHECK: $x0 = COPY [[LDRXui]] + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 128 %2(p0) = G_GEP %0, %1 %3(s64) = G_LOAD %2 :: (load 8 from %ir.addr) - %x0 = COPY %3 + $x0 = COPY %3 ... --- @@ -189,17 +225,17 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_gep_512_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 128 :: (load 4 from %ir.addr) - ; CHECK: %w0 = COPY [[LDRWui]] - %0(p0) = COPY %x0 + ; CHECK: $w0 = COPY [[LDRWui]] + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 512 %2(p0) = G_GEP %0, %1 %3(s32) = G_LOAD %2 :: (load 4 from %ir.addr) - %w0 = COPY %3 + $w0 = COPY %3 ... --- @@ -215,18 +251,19 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_gep_64_s16_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load 2 from %ir.addr) - ; CHECK: %w0 = COPY [[LDRHHui]] - %0(p0) = COPY %x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] + ; CHECK: $w0 = COPY [[T0]] + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 64 %2(p0) = G_GEP %0, %1 %3(s16) = G_LOAD %2 :: (load 2 from %ir.addr) %4:gpr(s32) = G_ANYEXT %3 - %w0 = COPY %4 + $w0 = COPY %4 ... --- @@ -242,18 +279,19 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_gep_1_s8_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load 1 from %ir.addr) - ; CHECK: %w0 = COPY [[LDRBBui]] - %0(p0) = COPY %x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] + ; CHECK: $w0 = COPY [[T0]] + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 1 %2(p0) = G_GEP %0, %1 %3(s8) = G_LOAD %2 :: (load 1 from %ir.addr) %4:gpr(s32) = G_ANYEXT %3 - %w0 = COPY %4 + $w0 = COPY %4 ... --- @@ -267,15 +305,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8 from %ir.addr) - ; CHECK: %d0 = COPY [[LDRDui]] - %0(p0) = COPY %x0 + ; CHECK: $d0 = COPY [[LDRDui]] + %0(p0) = COPY $x0 %1(s64) = G_LOAD %0 :: (load 8 from %ir.addr) - %d0 = COPY %1(s64) + $d0 = COPY %1(s64) ... --- @@ -289,15 +327,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0 :: (load 4 from %ir.addr) - ; CHECK: %s0 = COPY [[LDRSui]] - %0(p0) = COPY %x0 + ; CHECK: $s0 = COPY [[LDRSui]] + %0(p0) = COPY $x0 %1(s32) = G_LOAD %0 :: (load 4 from %ir.addr) - %s0 = COPY %1(s32) + $s0 = COPY %1(s32) ... --- @@ -311,15 +349,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_s16_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load 2 from %ir.addr) - ; CHECK: %h0 = COPY [[LDRHui]] - %0(p0) = COPY %x0 + ; CHECK: $h0 = COPY [[LDRHui]] + %0(p0) = COPY $x0 %1(s16) = G_LOAD %0 :: (load 2 from %ir.addr) - %h0 = COPY %1(s16) + $h0 = COPY %1(s16) ... --- @@ -333,15 +371,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_s8_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load 1 from %ir.addr) - ; CHECK: %b0 = COPY [[LDRBui]] - %0(p0) = COPY %x0 + ; CHECK: $b0 = COPY [[LDRBui]] + %0(p0) = COPY $x0 %1(s8) = G_LOAD %0 :: (load 1 from %ir.addr) - %b0 = COPY %1(s8) + $b0 = COPY %1(s8) ... --- @@ -357,17 +395,17 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_gep_8_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 1 :: (load 8 from %ir.addr) - ; CHECK: %d0 = COPY [[LDRDui]] - %0(p0) = COPY %x0 + ; CHECK: $d0 = COPY [[LDRDui]] + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 8 %2(p0) = G_GEP %0, %1 %3(s64) = G_LOAD %2 :: (load 8 from %ir.addr) - %d0 = COPY %3 + $d0 = COPY %3 ... --- @@ -383,17 +421,17 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_gep_16_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 4 :: (load 4 from %ir.addr) - ; CHECK: %s0 = COPY [[LDRSui]] - %0(p0) = COPY %x0 + ; CHECK: $s0 = COPY [[LDRSui]] + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 16 %2(p0) = G_GEP %0, %1 %3(s32) = G_LOAD %2 :: (load 4 from %ir.addr) - %s0 = COPY %3 + $s0 = COPY %3 ... --- @@ -409,17 +447,17 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_gep_64_s16_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 32 :: (load 2 from %ir.addr) - ; CHECK: %h0 = COPY [[LDRHui]] - %0(p0) = COPY %x0 + ; CHECK: $h0 = COPY [[LDRHui]] + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 64 %2(p0) = G_GEP %0, %1 %3(s16) = G_LOAD %2 :: (load 2 from %ir.addr) - %h0 = COPY %3 + $h0 = COPY %3 ... --- @@ -435,17 +473,17 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_gep_32_s8_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 32 :: (load 1 from %ir.addr) - ; CHECK: %b0 = COPY [[LDRBui]] - %0(p0) = COPY %x0 + ; CHECK: $b0 = COPY [[LDRBui]] + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 32 %2(p0) = G_GEP %0, %1 %3(s8) = G_LOAD %2 :: (load 1 from %ir.addr) - %b0 = COPY %3 + $b0 = COPY %3 ... --- name: load_v2s32 @@ -458,69 +496,13 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: load_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8 from %ir.addr) - ; CHECK: %d0 = COPY [[LDRDui]] - %0(p0) = COPY %x0 + ; CHECK: $d0 = COPY [[LDRDui]] + %0(p0) = COPY $x0 %1(<2 x s32>) = G_LOAD %0 :: (load 8 from %ir.addr) - %d0 = COPY %1(<2 x s32>) -... ---- -name: sextload_s32_from_s16 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: %w0 - - ; CHECK-LABEL: name: sextload_s32_from_s16 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRSHWui [[COPY]], 0 :: (load 2 from %ir.addr) - ; CHECK: %w0 = COPY [[T0]] - %0:gpr(p0) = COPY %x0 - %1:gpr(s16) = G_LOAD %0 :: (load 2 from %ir.addr) - %2:gpr(s32) = G_SEXT %1 - %w0 = COPY %2(s32) -... - ---- -name: zextload_s32_from_s16 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: %w0 - - ; CHECK-LABEL: name: zextload_s32_from_s16 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) - ; CHECK: %w0 = COPY [[T0]] - %0:gpr(p0) = COPY %x0 - %1:gpr(s16) = G_LOAD %0 :: (load 2 from %ir.addr) - %2:gpr(s32) = G_ZEXT %1 - %w0 = COPY %2(s32) -... - ---- -name: aextload_s32_from_s16 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: %w0 - - ; CHECK-LABEL: name: aextload_s32_from_s16 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) - ; CHECK: %w0 = COPY [[T0]] - %0:gpr(p0) = COPY %x0 - %1:gpr(s16) = G_LOAD %0 :: (load 2 from %ir.addr) - %2:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %2(s32) + $d0 = COPY %1(<2 x s32>) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-mul.mir b/test/CodeGen/AArch64/GlobalISel/select-mul.mir new file mode 100644 index 000000000000..548293f48c55 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-mul.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- +name: mul_i64_sext_imm32 +legalized: true +regBankSelected: true + +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + +body: | + bb.0: + liveins: $w0 + + ; Make sure InstructionSelector is able to match a pattern + ; with an SDNodeXForm, trunc_imm. + ; def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), + ; (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; + ; CHECK-LABEL: name: mul_i64_sext_imm32 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 3 + ; CHECK: [[SMADDLrrr:%[0-9]+]]:gpr64 = SMADDLrrr [[COPY]], [[MOVi32imm]], $xzr + ; CHECK: $x0 = COPY [[SMADDLrrr]] + %0:gpr(s32) = COPY $w0 + %1:gpr(s64) = G_SEXT %0(s32) + %2:gpr(s64) = G_CONSTANT i64 3 + %3:gpr(s64) = G_MUL %1, %2 + $x0 = COPY %3(s64) +... + + diff --git a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir index 0771504032c5..c44714c17deb 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -23,21 +23,21 @@ registers: body: | bb.0: - liveins: %x0, %w1, %w2 + liveins: $x0, $w1, $w2 ; CHECK-LABEL: name: SMADDLrrr_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY %w2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 ; CHECK: [[SMADDLrrr:%[0-9]+]]:gpr64 = SMADDLrrr [[COPY1]], [[COPY2]], [[COPY]] - ; CHECK: %x0 = COPY [[SMADDLrrr]] - %0(s64) = COPY %x0 - %1(s32) = COPY %w1 - %2(s32) = COPY %w2 + ; CHECK: $x0 = COPY [[SMADDLrrr]] + %0(s64) = COPY $x0 + %1(s32) = COPY $w1 + %2(s32) = COPY $w2 %3(s64) = G_SEXT %1 %4(s64) = G_SEXT %2 %5(s64) = G_MUL %3, %4 %6(s64) = G_ADD %0, %5 - %x0 = COPY %6 + $x0 = COPY %6 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir b/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir index def06daae0b4..690ebc401a98 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -mattr=+neon,+fullfp16 -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -mattr=+neon,+fullfp16 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -20,14 +20,14 @@ registers: body: | bb.0: - liveins: %d0 + liveins: $d0 ; CHECK-LABEL: name: vcvtfxu2fp_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[UCVTFd:%[0-9]+]]:fpr64 = UCVTFd [[COPY]], 12 - ; CHECK: %d1 = COPY [[UCVTFd]] - %0(s64) = COPY %d0 + ; CHECK: $d1 = COPY [[UCVTFd]] + %0(s64) = COPY $d0 %1(s32) = G_CONSTANT i32 12 %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, %1 - %d1 = COPY %2(s64) + $d1 = COPY %2(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-phi.mir b/test/CodeGen/AArch64/GlobalISel/select-phi.mir index 3454ffadcce0..832a57e70391 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-phi.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-phi.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-unknown-unknown -o - -global-isel -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s +# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s --- | ; ModuleID = '/tmp/test.ll' source_filename = "/tmp/test.ll" @@ -51,11 +51,11 @@ liveins: body: | bb.1.entry: successors: %bb.2.case1(0x40000000), %bb.3.case2(0x40000000) - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: test_phi ; CHECK: [[RES:%.*]]:gpr32 = PHI - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_CONSTANT i32 0 %3(s32) = G_CONSTANT i32 1 %5(s32) = G_CONSTANT i32 2 @@ -77,8 +77,8 @@ body: | bb.4.return: %7(s32) = G_PHI %4(s32), %bb.2.case1, %6(s32), %bb.3.case2 - %w0 = COPY %7(s32) - RET_ReallyLR implicit %w0 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 ... @@ -101,12 +101,12 @@ liveins: body: | bb.0: successors: %bb.1, %bb.2 - liveins: %w2, %x0, %x1 + liveins: $w2, $x0, $x1 ; CHECK-LABEL: name: test_phi_ptr - %0(p0) = COPY %x0 - %1(p0) = COPY %x1 - %6:gpr(s32) = COPY %w2 + %0(p0) = COPY $x0 + %1(p0) = COPY $x1 + %6:gpr(s32) = COPY $w2 %2(s1) = G_TRUNC %6 G_BRCOND %2(s1), %bb.1 G_BR %bb.2 @@ -118,7 +118,7 @@ body: | bb.2: ; CHECK: %{{[0-9]+}}:gpr64 = PHI %{{[0-9]+}}, %bb.0, %{{[0-9]+}}, %bb.1 %3(p0) = G_PHI %0(p0), %bb.0, %1(p0), %bb.1 - %x0 = COPY %3(p0) - RET_ReallyLR implicit %x0 + $x0 = COPY %3(p0) + RET_ReallyLR implicit $x0 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir b/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir index 5e0ead2dbdb3..daee1ca7b556 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | define i32 @main() { @@ -50,17 +50,17 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.1.entry: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: main - ; CHECK: liveins: %w0 + ; CHECK: liveins: $w0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[EONWrr:%[0-9]+]]:gpr32 = EONWrr [[COPY]], [[MOVi32imm]] - ; CHECK: %w0 = COPY [[EONWrr]] + ; CHECK: $w0 = COPY [[EONWrr]] %0(s32) = G_CONSTANT i32 -1 %3(s32) = G_CONSTANT i32 1 - %1(s32) = COPY %w0 + %1(s32) = COPY $w0 %2(s32) = G_XOR %1, %0 %4(s32) = G_XOR %2, %3 - %w0 = COPY %4(s32) + $w0 = COPY %4(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-property.mir b/test/CodeGen/AArch64/GlobalISel/select-property.mir index 86961ac597e1..17a0738a6f2b 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-property.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-property.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" diff --git a/test/CodeGen/AArch64/GlobalISel/select-sextload.mir b/test/CodeGen/AArch64/GlobalISel/select-sextload.mir new file mode 100644 index 000000000000..0e2c3077bfc9 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-sextload.mir @@ -0,0 +1,47 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + define void @sextload_s32_from_s16(i16 *%addr) { ret void } + define void @sextload_s32_from_s16_not_combined(i16 *%addr) { ret void } +... + +--- +name: sextload_s32_from_s16 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: sextload_s32_from_s16 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRSHWui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: $w0 = COPY [[T0]] + %0:gpr(p0) = COPY $x0 + %1:gpr(s32) = G_SEXTLOAD %0 :: (load 2 from %ir.addr) + $w0 = COPY %1(s32) +... + +--- +name: sextload_s32_from_s16_not_combined +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: sextload_s32_from_s16_not_combined + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[T1:%[0-9]+]]:gpr32 = SBFMWri [[T0]], 0, 15 + ; CHECK: $w0 = COPY [[T1]] + %0:gpr(p0) = COPY $x0 + %1:gpr(s16) = G_LOAD %0 :: (load 2 from %ir.addr) + %2:gpr(s32) = G_SEXT %1 + $w0 = COPY %2(s32) +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-store.mir b/test/CodeGen/AArch64/GlobalISel/select-store.mir index 11710031e21c..c4e93af1c857 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-store.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-store.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -42,14 +42,14 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: store_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) - %0(p0) = COPY %x0 - %1(s64) = COPY %x1 + %0(p0) = COPY $x0 + %1(s64) = COPY $x1 G_STORE %1, %0 :: (store 8 into %ir.addr) ... @@ -65,14 +65,14 @@ registers: body: | bb.0: - liveins: %x0, %w1 + liveins: $x0, $w1 ; CHECK-LABEL: name: store_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) - %0(p0) = COPY %x0 - %1(s32) = COPY %w1 + %0(p0) = COPY $x0 + %1(s32) = COPY $w1 G_STORE %1, %0 :: (store 4 into %ir.addr) ... @@ -88,15 +88,14 @@ registers: body: | bb.0: - liveins: %x0, %w1 + liveins: $x0, $w1 ; CHECK-LABEL: name: store_s16_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: STRHHui [[COPY2]], [[COPY]], 0 :: (store 2 into %ir.addr) - %0(p0) = COPY %x0 - %2:gpr(s32) = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: STRHHui [[COPY1]], [[COPY]], 0 :: (store 2 into %ir.addr) + %0(p0) = COPY $x0 + %2:gpr(s32) = COPY $w1 %1(s16) = G_TRUNC %2 G_STORE %1, %0 :: (store 2 into %ir.addr) @@ -113,15 +112,14 @@ registers: body: | bb.0: - liveins: %x0, %w1 + liveins: $x0, $w1 ; CHECK-LABEL: name: store_s8_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: STRBBui [[COPY2]], [[COPY]], 0 :: (store 1 into %ir.addr) - %0(p0) = COPY %x0 - %2:gpr(s32) = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: STRBBui [[COPY1]], [[COPY]], 0 :: (store 1 into %ir.addr) + %0(p0) = COPY $x0 + %2:gpr(s32) = COPY $w1 %1(s8) = G_TRUNC %2 G_STORE %1, %0 :: (store 1 into %ir.addr) @@ -138,12 +136,12 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: store_zero_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: STRXui %xzr, [[COPY]], 0 :: (store 8 into %ir.addr) - %0(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8 into %ir.addr) + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 0 G_STORE %1, %0 :: (store 8 into %ir.addr) @@ -160,12 +158,12 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: store_zero_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: STRWui %wzr, [[COPY]], 0 :: (store 4 into %ir.addr) - %0(p0) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: STRWui $wzr, [[COPY]], 0 :: (store 4 into %ir.addr) + %0(p0) = COPY $x0 %1(s32) = G_CONSTANT i32 0 G_STORE %1, %0 :: (store 4 into %ir.addr) @@ -185,12 +183,12 @@ stack: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: store_fi_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: STRXui [[COPY]], %stack.0.ptr0, 0 :: (store 8) - %0(p0) = COPY %x0 + %0(p0) = COPY $x0 %1(p0) = G_FRAME_INDEX %stack.0.ptr0 G_STORE %0, %1 :: (store 8) ... @@ -208,14 +206,14 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: store_gep_128_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: STRXui [[COPY1]], [[COPY]], 16 :: (store 8 into %ir.addr) - %0(p0) = COPY %x0 - %1(s64) = COPY %x1 + %0(p0) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_CONSTANT i64 128 %3(p0) = G_GEP %0, %2 G_STORE %1, %3 :: (store 8 into %ir.addr) @@ -234,14 +232,14 @@ registers: body: | bb.0: - liveins: %x0, %w1 + liveins: $x0, $w1 ; CHECK-LABEL: name: store_gep_512_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: STRWui [[COPY1]], [[COPY]], 128 :: (store 4 into %ir.addr) - %0(p0) = COPY %x0 - %1(s32) = COPY %w1 + %0(p0) = COPY $x0 + %1(s32) = COPY $w1 %2(s64) = G_CONSTANT i64 512 %3(p0) = G_GEP %0, %2 G_STORE %1, %3 :: (store 4 into %ir.addr) @@ -260,15 +258,14 @@ registers: body: | bb.0: - liveins: %x0, %w1 + liveins: $x0, $w1 ; CHECK-LABEL: name: store_gep_64_s16_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: STRHHui [[COPY2]], [[COPY]], 32 :: (store 2 into %ir.addr) - %0(p0) = COPY %x0 - %4:gpr(s32) = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: STRHHui [[COPY1]], [[COPY]], 32 :: (store 2 into %ir.addr) + %0(p0) = COPY $x0 + %4:gpr(s32) = COPY $w1 %1(s16) = G_TRUNC %4 %2(s64) = G_CONSTANT i64 64 %3(p0) = G_GEP %0, %2 @@ -288,15 +285,14 @@ registers: body: | bb.0: - liveins: %x0, %w1 + liveins: $x0, $w1 ; CHECK-LABEL: name: store_gep_1_s8_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] - ; CHECK: STRBBui [[COPY2]], [[COPY]], 1 :: (store 1 into %ir.addr) - %0(p0) = COPY %x0 - %4:gpr(s32) = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: STRBBui [[COPY1]], [[COPY]], 1 :: (store 1 into %ir.addr) + %0(p0) = COPY $x0 + %4:gpr(s32) = COPY $w1 %1(s8) = G_TRUNC %4 %2(s64) = G_CONSTANT i64 1 %3(p0) = G_GEP %0, %2 @@ -314,14 +310,14 @@ registers: body: | bb.0: - liveins: %x0, %d1 + liveins: $x0, $d1 ; CHECK-LABEL: name: store_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) - %0(p0) = COPY %x0 - %1(s64) = COPY %d1 + %0(p0) = COPY $x0 + %1(s64) = COPY $d1 G_STORE %1, %0 :: (store 8 into %ir.addr) ... @@ -337,14 +333,14 @@ registers: body: | bb.0: - liveins: %x0, %s1 + liveins: $x0, $s1 ; CHECK-LABEL: name: store_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) - %0(p0) = COPY %x0 - %1(s32) = COPY %s1 + %0(p0) = COPY $x0 + %1(s32) = COPY $s1 G_STORE %1, %0 :: (store 4 into %ir.addr) ... @@ -362,14 +358,14 @@ registers: body: | bb.0: - liveins: %x0, %d1 + liveins: $x0, $d1 ; CHECK-LABEL: name: store_gep_8_s64_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: STRDui [[COPY1]], [[COPY]], 1 :: (store 8 into %ir.addr) - %0(p0) = COPY %x0 - %1(s64) = COPY %d1 + %0(p0) = COPY $x0 + %1(s64) = COPY $d1 %2(s64) = G_CONSTANT i64 8 %3(p0) = G_GEP %0, %2 G_STORE %1, %3 :: (store 8 into %ir.addr) @@ -388,14 +384,14 @@ registers: body: | bb.0: - liveins: %x0, %s1 + liveins: $x0, $s1 ; CHECK-LABEL: name: store_gep_8_s32_fpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: STRSui [[COPY1]], [[COPY]], 2 :: (store 4 into %ir.addr) - %0(p0) = COPY %x0 - %1(s32) = COPY %s1 + %0(p0) = COPY $x0 + %1(s32) = COPY $s1 %2(s64) = G_CONSTANT i64 8 %3(p0) = G_GEP %0, %2 G_STORE %1, %3 :: (store 4 into %ir.addr) @@ -411,14 +407,14 @@ registers: body: | bb.0: - liveins: %x0, %d1 + liveins: $x0, $d1 ; CHECK-LABEL: name: store_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) - %0(p0) = COPY %x0 - %1(<2 x s32>) = COPY %d1 + %0(p0) = COPY $x0 + %1(<2 x s32>) = COPY $d1 G_STORE %1, %0 :: (store 8 into %ir.addr) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-trap.mir b/test/CodeGen/AArch64/GlobalISel/select-trap.mir new file mode 100644 index 000000000000..2af588130e9f --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-trap.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + ; Function Attrs: noreturn nounwind + declare void @llvm.trap() #0 + + define void @foo() { + call void @llvm.trap() + ret void + } + + attributes #0 = { noreturn nounwind } + +... +--- +name: foo +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + ; CHECK-LABEL: name: foo + ; CHECK: BRK 1 + ; CHECK: RET_ReallyLR + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap) + RET_ReallyLR + +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir index 421a676f7a43..1bc7795371a0 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -20,15 +20,15 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: trunc_s32_s64 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32 - ; CHECK: %w0 = COPY [[COPY1]] - %0(s64) = COPY %x0 + ; CHECK: $w0 = COPY [[COPY1]] + %0(s64) = COPY $x0 %1(s32) = G_TRUNC %0 - %w0 = COPY %1(s32) + $w0 = COPY %1(s32) ... --- @@ -42,17 +42,17 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: trunc_s8_s64 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] - ; CHECK: %w0 = COPY [[COPY2]] - %0(s64) = COPY %x0 + ; CHECK: $w0 = COPY [[COPY2]] + %0(s64) = COPY $x0 %1(s8) = G_TRUNC %0 %2:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -66,15 +66,14 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: trunc_s1_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] - ; CHECK: %w0 = COPY [[COPY2]] - %0(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY]] + ; CHECK: $w0 = COPY [[COPY2]] + %0(s32) = COPY $w0 %1(s1) = G_TRUNC %0 %2:gpr(s32) = G_ANYEXT %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir b/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir new file mode 100644 index 000000000000..407f4bff300c --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir @@ -0,0 +1,4546 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -run-pass instruction-select %s \ +# RUN: -disable-gisel-legality-check -verify-machineinstrs -simplify-mir \ +# RUN: -o - | FileCheck %s +--- +name: test_rule14_id188_at_idx1067 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule14_id188_at_idx1067 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] + %1:fpr(p0) = COPY $d0 + %0:fpr(s128) = G_LOAD %1(p0) :: (load 16) + $noreg = PATCHABLE_RET %0(s128) + +... +--- +name: test_rule21_id2237_at_idx1449 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%0' } + - { reg: '$d1', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule21_id2237_at_idx1449 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: STRDui [[COPY1]], [[COPY2]], 0 :: (store 8) + ; CHECK: $noreg = PATCHABLE_RET + %1:fpr(p0) = COPY $d1 + %0:fpr(<8 x s8>) = COPY $d0 + G_STORE %0(<8 x s8>), %1(p0) :: (store 8) + $noreg = PATCHABLE_RET + +... +--- +name: test_rule22_id2238_at_idx1505 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%0' } + - { reg: '$d1', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule22_id2238_at_idx1505 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: STRDui [[COPY1]], [[COPY2]], 0 :: (store 8) + ; CHECK: $noreg = PATCHABLE_RET + %1:fpr(p0) = COPY $d1 + %0:fpr(<4 x s16>) = COPY $d0 + G_STORE %0(<4 x s16>), %1(p0) :: (store 8) + $noreg = PATCHABLE_RET + +... +--- +name: test_rule27_id2243_at_idx1781 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%0' } + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule27_id2243_at_idx1781 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: $noreg = PATCHABLE_RET + %1:fpr(p0) = COPY $d0 + %0:fpr(<4 x s32>) = COPY $q0 + G_STORE %0(<4 x s32>), %1(p0) :: (store 16) + $noreg = PATCHABLE_RET + +... +--- +name: test_rule28_id2244_at_idx1837 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%0' } + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule28_id2244_at_idx1837 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: $noreg = PATCHABLE_RET + %1:fpr(p0) = COPY $d0 + %0:fpr(<2 x s64>) = COPY $q0 + G_STORE %0(<2 x s64>), %1(p0) :: (store 16) + $noreg = PATCHABLE_RET + +... +--- +name: test_rule29_id2245_at_idx1893 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%0' } + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule29_id2245_at_idx1893 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: $noreg = PATCHABLE_RET + %1:fpr(p0) = COPY $d0 + %0:fpr(<16 x s8>) = COPY $q0 + G_STORE %0(<16 x s8>), %1(p0) :: (store 16) + $noreg = PATCHABLE_RET + +... +--- +name: test_rule30_id2246_at_idx1949 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%0' } + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule30_id2246_at_idx1949 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: $noreg = PATCHABLE_RET + %1:fpr(p0) = COPY $d0 + %0:fpr(<8 x s16>) = COPY $q0 + G_STORE %0(<8 x s16>), %1(p0) :: (store 16) + $noreg = PATCHABLE_RET + +... +--- +name: test_rule34_id2250_at_idx2173 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%0' } + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule34_id2250_at_idx2173 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: $noreg = PATCHABLE_RET + %1:fpr(p0) = COPY $d0 + %0:fpr(s128) = COPY $q0 + G_STORE %0(s128), %1(p0) :: (store 16) + $noreg = PATCHABLE_RET + +... +# The rules that generated this test has changed. The generator should be rerun +--- +name: test_rule92_id2150_at_idx7770 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } +liveins: + - { reg: '$x0', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: test_rule92_id2150_at_idx7770 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1) + ; CHECK: $noreg = PATCHABLE_RET [[LDRBBui]] + %2:gpr(p0) = COPY $x0 + %0:gpr(s32) = G_LOAD %2(p0) :: (load 1) + $noreg = PATCHABLE_RET %0(s32) + +... +# The rules that generated this test has changed. The generator should be rerun +--- +name: test_rule96_id2146_at_idx8070 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } +liveins: + - { reg: '$x0', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: test_rule96_id2146_at_idx8070 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load 1) + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[LDRBui]] + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY2]], 0, 0 + ; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]] + %2:gpr(p0) = COPY $x0 + %0:fpr(s1) = G_LOAD %2(p0) :: (load 1) + %1:gpr(s32) = G_ZEXT %0(s1) + $noreg = PATCHABLE_RET %1(s32) + +... +--- +name: test_rule129_id2130_at_idx10828 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule129_id2130_at_idx10828 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load 8) + ; CHECK: $noreg = PATCHABLE_RET [[LDRDui]] + %1:fpr(p0) = COPY $d0 + %0:fpr(<8 x s8>) = G_LOAD %1(p0) :: (load 8) + $noreg = PATCHABLE_RET %0(<8 x s8>) + +... +--- +name: test_rule130_id2131_at_idx10884 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule130_id2131_at_idx10884 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load 8) + ; CHECK: $noreg = PATCHABLE_RET [[LDRDui]] + %1:fpr(p0) = COPY $d0 + %0:fpr(<4 x s16>) = G_LOAD %1(p0) :: (load 8) + $noreg = PATCHABLE_RET %0(<4 x s16>) + +... +--- +name: test_rule135_id2136_at_idx11160 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule135_id2136_at_idx11160 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] + %1:fpr(p0) = COPY $d0 + %0:fpr(<4 x s32>) = G_LOAD %1(p0) :: (load 16) + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule136_id2137_at_idx11216 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule136_id2137_at_idx11216 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] + %1:fpr(p0) = COPY $d0 + %0:fpr(<2 x s64>) = G_LOAD %1(p0) :: (load 16) + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule137_id2138_at_idx11272 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule137_id2138_at_idx11272 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] + %1:fpr(p0) = COPY $d0 + %0:fpr(<16 x s8>) = G_LOAD %1(p0) :: (load 16) + $noreg = PATCHABLE_RET %0(<16 x s8>) + +... +--- +name: test_rule138_id2139_at_idx11328 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule138_id2139_at_idx11328 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] + %1:fpr(p0) = COPY $d0 + %0:fpr(<8 x s16>) = G_LOAD %1(p0) :: (load 16) + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule339_id2369_at_idx26608 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } + - { id: 5, class: fpr } +liveins: + - { reg: '$s0', virtual-reg: '%3' } + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$s2', virtual-reg: '%5' } +body: | + bb.0.entry: + liveins: $s0, $s1, $s2 + + ; CHECK-LABEL: name: test_rule339_id2369_at_idx26608 + ; CHECK: liveins: $s0, $s1, $s2 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FNMADDSrrr:%[0-9]+]]:fpr32 = FNMADDSrrr [[COPY1]], [[COPY2]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMADDSrrr]] + %5:fpr(s32) = COPY $s2 + %4:fpr(s32) = COPY $s1 + %3:fpr(s32) = COPY $s0 + %1:fpr(s32) = G_FNEG %5 + %0:fpr(s32) = G_FNEG %4 + %2:fpr(s32) = G_FMA %0, %3, %1 + $noreg = PATCHABLE_RET %2(s32) + +... +--- +name: test_rule340_id2370_at_idx26714 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } + - { id: 5, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } + - { reg: '$d2', virtual-reg: '%5' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule340_id2370_at_idx26714 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FNMADDDrrr:%[0-9]+]]:fpr64 = FNMADDDrrr [[COPY1]], [[COPY2]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMADDDrrr]] + %5:fpr(s64) = COPY $d2 + %4:fpr(s64) = COPY $d1 + %3:fpr(s64) = COPY $d0 + %1:fpr(s64) = G_FNEG %5 + %0:fpr(s64) = G_FNEG %4 + %2:fpr(s64) = G_FMA %0, %3, %1 + $noreg = PATCHABLE_RET %2(s64) + +... +--- +name: test_rule341_id2371_at_idx26820 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } + - { id: 5, class: fpr } +liveins: + - { reg: '$s0', virtual-reg: '%3' } + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$s2', virtual-reg: '%5' } +body: | + bb.0.entry: + liveins: $s0, $s1, $s2 + + ; CHECK-LABEL: name: test_rule341_id2371_at_idx26820 + ; CHECK: liveins: $s0, $s1, $s2 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FNMADDSrrr:%[0-9]+]]:fpr32 = FNMADDSrrr [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMADDSrrr]] + %5:fpr(s32) = COPY $s2 + %4:fpr(s32) = COPY $s1 + %3:fpr(s32) = COPY $s0 + %1:fpr(s32) = G_FNEG %5 + %0:fpr(s32) = G_FNEG %4 + %2:fpr(s32) = G_FMA %3, %0, %1 + $noreg = PATCHABLE_RET %2(s32) + +... +--- +name: test_rule342_id2372_at_idx26926 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } + - { id: 5, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } + - { reg: '$d2', virtual-reg: '%5' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule342_id2372_at_idx26926 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FNMADDDrrr:%[0-9]+]]:fpr64 = FNMADDDrrr [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMADDDrrr]] + %5:fpr(s64) = COPY $d2 + %4:fpr(s64) = COPY $d1 + %3:fpr(s64) = COPY $d0 + %1:fpr(s64) = G_FNEG %5 + %0:fpr(s64) = G_FNEG %4 + %2:fpr(s64) = G_FMA %3, %0, %1 + $noreg = PATCHABLE_RET %2(s64) + +... +--- +name: test_rule343_id1266_at_idx27032 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule343_id1266_at_idx27032 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SADDLv8i8_v8i16_:%[0-9]+]]:fpr128 = SADDLv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SADDLv8i8_v8i16_]] + %4:fpr(<8 x s8>) = COPY $d1 + %3:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s16>) = G_SEXT %4(<8 x s8>) + %0:fpr(<8 x s16>) = G_SEXT %3(<8 x s8>) + %2:fpr(<8 x s16>) = G_ADD %0, %1 + $noreg = PATCHABLE_RET %2(<8 x s16>) + +... +--- +name: test_rule344_id1268_at_idx27128 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule344_id1268_at_idx27128 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SADDLv4i16_v4i32_:%[0-9]+]]:fpr128 = SADDLv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SADDLv4i16_v4i32_]] + %4:fpr(<4 x s16>) = COPY $d1 + %3:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s32>) = G_SEXT %4(<4 x s16>) + %0:fpr(<4 x s32>) = G_SEXT %3(<4 x s16>) + %2:fpr(<4 x s32>) = G_ADD %0, %1 + $noreg = PATCHABLE_RET %2(<4 x s32>) + +... +--- +name: test_rule345_id1270_at_idx27224 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule345_id1270_at_idx27224 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SADDLv2i32_v2i64_:%[0-9]+]]:fpr128 = SADDLv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SADDLv2i32_v2i64_]] + %4:fpr(<2 x s32>) = COPY $d1 + %3:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s64>) = G_SEXT %4(<2 x s32>) + %0:fpr(<2 x s64>) = G_SEXT %3(<2 x s32>) + %2:fpr(<2 x s64>) = G_ADD %0, %1 + $noreg = PATCHABLE_RET %2(<2 x s64>) + +... +--- +name: test_rule346_id1326_at_idx27320 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule346_id1326_at_idx27320 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[UADDLv8i8_v8i16_:%[0-9]+]]:fpr128 = UADDLv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UADDLv8i8_v8i16_]] + %4:fpr(<8 x s8>) = COPY $d1 + %3:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s16>) = G_ZEXT %4(<8 x s8>) + %0:fpr(<8 x s16>) = G_ZEXT %3(<8 x s8>) + %2:fpr(<8 x s16>) = G_ADD %0, %1 + $noreg = PATCHABLE_RET %2(<8 x s16>) + +... +--- +name: test_rule347_id1328_at_idx27416 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule347_id1328_at_idx27416 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[UADDLv4i16_v4i32_:%[0-9]+]]:fpr128 = UADDLv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UADDLv4i16_v4i32_]] + %4:fpr(<4 x s16>) = COPY $d1 + %3:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s32>) = G_ZEXT %4(<4 x s16>) + %0:fpr(<4 x s32>) = G_ZEXT %3(<4 x s16>) + %2:fpr(<4 x s32>) = G_ADD %0, %1 + $noreg = PATCHABLE_RET %2(<4 x s32>) + +... +--- +name: test_rule348_id1330_at_idx27512 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule348_id1330_at_idx27512 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[UADDLv2i32_v2i64_:%[0-9]+]]:fpr128 = UADDLv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UADDLv2i32_v2i64_]] + %4:fpr(<2 x s32>) = COPY $d1 + %3:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s64>) = G_ZEXT %4(<2 x s32>) + %0:fpr(<2 x s64>) = G_ZEXT %3(<2 x s32>) + %2:fpr(<2 x s64>) = G_ADD %0, %1 + $noreg = PATCHABLE_RET %2(<2 x s64>) + +... +--- +name: test_rule349_id1308_at_idx27608 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule349_id1308_at_idx27608 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SSUBLv8i8_v8i16_:%[0-9]+]]:fpr128 = SSUBLv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SSUBLv8i8_v8i16_]] + %4:fpr(<8 x s8>) = COPY $d1 + %3:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s16>) = G_SEXT %4(<8 x s8>) + %0:fpr(<8 x s16>) = G_SEXT %3(<8 x s8>) + %2:fpr(<8 x s16>) = G_SUB %0, %1 + $noreg = PATCHABLE_RET %2(<8 x s16>) + +... +--- +name: test_rule350_id1310_at_idx27704 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule350_id1310_at_idx27704 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SSUBLv4i16_v4i32_:%[0-9]+]]:fpr128 = SSUBLv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SSUBLv4i16_v4i32_]] + %4:fpr(<4 x s16>) = COPY $d1 + %3:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s32>) = G_SEXT %4(<4 x s16>) + %0:fpr(<4 x s32>) = G_SEXT %3(<4 x s16>) + %2:fpr(<4 x s32>) = G_SUB %0, %1 + $noreg = PATCHABLE_RET %2(<4 x s32>) + +... +--- +name: test_rule351_id1312_at_idx27800 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule351_id1312_at_idx27800 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SSUBLv2i32_v2i64_:%[0-9]+]]:fpr128 = SSUBLv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SSUBLv2i32_v2i64_]] + %4:fpr(<2 x s32>) = COPY $d1 + %3:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s64>) = G_SEXT %4(<2 x s32>) + %0:fpr(<2 x s64>) = G_SEXT %3(<2 x s32>) + %2:fpr(<2 x s64>) = G_SUB %0, %1 + $noreg = PATCHABLE_RET %2(<2 x s64>) + +... +--- +name: test_rule352_id1356_at_idx27896 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule352_id1356_at_idx27896 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USUBLv8i8_v8i16_:%[0-9]+]]:fpr128 = USUBLv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[USUBLv8i8_v8i16_]] + %4:fpr(<8 x s8>) = COPY $d1 + %3:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s16>) = G_ZEXT %4(<8 x s8>) + %0:fpr(<8 x s16>) = G_ZEXT %3(<8 x s8>) + %2:fpr(<8 x s16>) = G_SUB %0, %1 + $noreg = PATCHABLE_RET %2(<8 x s16>) + +... +--- +name: test_rule353_id1358_at_idx27992 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule353_id1358_at_idx27992 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USUBLv4i16_v4i32_:%[0-9]+]]:fpr128 = USUBLv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[USUBLv4i16_v4i32_]] + %4:fpr(<4 x s16>) = COPY $d1 + %3:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s32>) = G_ZEXT %4(<4 x s16>) + %0:fpr(<4 x s32>) = G_ZEXT %3(<4 x s16>) + %2:fpr(<4 x s32>) = G_SUB %0, %1 + $noreg = PATCHABLE_RET %2(<4 x s32>) + +... +--- +name: test_rule354_id1360_at_idx28088 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%3' } + - { reg: '$d1', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule354_id1360_at_idx28088 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USUBLv2i32_v2i64_:%[0-9]+]]:fpr128 = USUBLv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[USUBLv2i32_v2i64_]] + %4:fpr(<2 x s32>) = COPY $d1 + %3:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s64>) = G_ZEXT %4(<2 x s32>) + %0:fpr(<2 x s64>) = G_ZEXT %3(<2 x s32>) + %2:fpr(<2 x s64>) = G_SUB %0, %1 + $noreg = PATCHABLE_RET %2(<2 x s64>) + +... +--- +name: test_rule928_id2367_at_idx60019 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$s0', virtual-reg: '%2' } + - { reg: '$s1', virtual-reg: '%3' } + - { reg: '$s2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $s0, $s1, $s2 + + ; CHECK-LABEL: name: test_rule928_id2367_at_idx60019 + ; CHECK: liveins: $s0, $s1, $s2 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FMSUBSrrr:%[0-9]+]]:fpr32 = FMSUBSrrr [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK: $noreg = PATCHABLE_RET [[FMSUBSrrr]] + %4:fpr(s32) = COPY $s2 + %3:fpr(s32) = COPY $s1 + %2:fpr(s32) = COPY $s0 + %0:fpr(s32) = G_FNEG %4 + %1:fpr(s32) = G_FMA %0, %2, %3 + $noreg = PATCHABLE_RET %1(s32) + +... +--- +name: test_rule929_id2368_at_idx60105 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule929_id2368_at_idx60105 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FMSUBDrrr:%[0-9]+]]:fpr64 = FMSUBDrrr [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK: $noreg = PATCHABLE_RET [[FMSUBDrrr]] + %4:fpr(s64) = COPY $d2 + %3:fpr(s64) = COPY $d1 + %2:fpr(s64) = COPY $d0 + %0:fpr(s64) = G_FNEG %4 + %1:fpr(s64) = G_FMA %0, %2, %3 + $noreg = PATCHABLE_RET %1(s64) + +... +--- +name: test_rule930_id2446_at_idx60191 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule930_id2446_at_idx60191 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FMLSv2f32_:%[0-9]+]]:fpr64 = FMLSv2f32 [[COPY1]], [[COPY]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f32_]] + %4:fpr(<2 x s32>) = COPY $d2 + %3:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FNEG %4 + %1:fpr(<2 x s32>) = G_FMA %0, %2, %3 + $noreg = PATCHABLE_RET %1(<2 x s32>) + +... +--- +name: test_rule931_id2447_at_idx60277 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule931_id2447_at_idx60277 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FMLSv4f32_:%[0-9]+]]:fpr128 = FMLSv4f32 [[COPY1]], [[COPY]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[FMLSv4f32_]] + %4:fpr(<4 x s32>) = COPY $q2 + %3:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FNEG %4 + %1:fpr(<4 x s32>) = G_FMA %0, %2, %3 + $noreg = PATCHABLE_RET %1(<4 x s32>) + +... +--- +name: test_rule932_id2448_at_idx60363 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule932_id2448_at_idx60363 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FMLSv2f64_:%[0-9]+]]:fpr128 = FMLSv2f64 [[COPY1]], [[COPY]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f64_]] + %4:fpr(<2 x s64>) = COPY $q2 + %3:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FNEG %4 + %1:fpr(<2 x s64>) = G_FMA %0, %2, %3 + $noreg = PATCHABLE_RET %1(<2 x s64>) + +... +--- +name: test_rule934_id429_at_idx60537 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$s0', virtual-reg: '%2' } + - { reg: '$s1', virtual-reg: '%3' } + - { reg: '$s2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $s0, $s1, $s2 + + ; CHECK-LABEL: name: test_rule934_id429_at_idx60537 + ; CHECK: liveins: $s0, $s1, $s2 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FMSUBSrrr:%[0-9]+]]:fpr32 = FMSUBSrrr [[COPY2]], [[COPY]], [[COPY1]] + ; CHECK: $noreg = PATCHABLE_RET [[FMSUBSrrr]] + %4:fpr(s32) = COPY $s2 + %3:fpr(s32) = COPY $s1 + %2:fpr(s32) = COPY $s0 + %0:fpr(s32) = G_FNEG %4 + %1:fpr(s32) = G_FMA %2, %0, %3 + $noreg = PATCHABLE_RET %1(s32) + +... +--- +name: test_rule935_id430_at_idx60625 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule935_id430_at_idx60625 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FMSUBDrrr:%[0-9]+]]:fpr64 = FMSUBDrrr [[COPY2]], [[COPY]], [[COPY1]] + ; CHECK: $noreg = PATCHABLE_RET [[FMSUBDrrr]] + %4:fpr(s64) = COPY $d2 + %3:fpr(s64) = COPY $d1 + %2:fpr(s64) = COPY $d0 + %0:fpr(s64) = G_FNEG %4 + %1:fpr(s64) = G_FMA %2, %0, %3 + $noreg = PATCHABLE_RET %1(s64) + +... +--- +name: test_rule938_id899_at_idx60889 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule938_id899_at_idx60889 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FMLSv2f32_:%[0-9]+]]:fpr64 = FMLSv2f32 [[COPY1]], [[COPY2]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f32_]] + %4:fpr(<2 x s32>) = COPY $d2 + %3:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FNEG %4 + %1:fpr(<2 x s32>) = G_FMA %2, %0, %3 + $noreg = PATCHABLE_RET %1(<2 x s32>) + +... +--- +name: test_rule939_id900_at_idx60977 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule939_id900_at_idx60977 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FMLSv4f32_:%[0-9]+]]:fpr128 = FMLSv4f32 [[COPY1]], [[COPY2]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FMLSv4f32_]] + %4:fpr(<4 x s32>) = COPY $q2 + %3:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FNEG %4 + %1:fpr(<4 x s32>) = G_FMA %2, %0, %3 + $noreg = PATCHABLE_RET %1(<4 x s32>) + +... +--- +name: test_rule940_id901_at_idx61065 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule940_id901_at_idx61065 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FMLSv2f64_:%[0-9]+]]:fpr128 = FMLSv2f64 [[COPY1]], [[COPY2]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f64_]] + %4:fpr(<2 x s64>) = COPY $q2 + %3:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FNEG %4 + %1:fpr(<2 x s64>) = G_FMA %2, %0, %3 + $noreg = PATCHABLE_RET %1(<2 x s64>) + +... +--- +name: test_rule942_id435_at_idx61241 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$s0', virtual-reg: '%2' } + - { reg: '$s1', virtual-reg: '%3' } + - { reg: '$s2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $s0, $s1, $s2 + + ; CHECK-LABEL: name: test_rule942_id435_at_idx61241 + ; CHECK: liveins: $s0, $s1, $s2 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FNMSUBSrrr:%[0-9]+]]:fpr32 = FNMSUBSrrr [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMSUBSrrr]] + %4:fpr(s32) = COPY $s2 + %3:fpr(s32) = COPY $s1 + %2:fpr(s32) = COPY $s0 + %0:fpr(s32) = G_FNEG %4 + %1:fpr(s32) = G_FMA %2, %3, %0 + $noreg = PATCHABLE_RET %1(s32) + +... +--- +name: test_rule943_id436_at_idx61329 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule943_id436_at_idx61329 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FNMSUBDrrr:%[0-9]+]]:fpr64 = FNMSUBDrrr [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMSUBDrrr]] + %4:fpr(s64) = COPY $d2 + %3:fpr(s64) = COPY $d1 + %2:fpr(s64) = COPY $d0 + %0:fpr(s64) = G_FNEG %4 + %1:fpr(s64) = G_FMA %2, %3, %0 + $noreg = PATCHABLE_RET %1(s64) + +... +--- +name: test_rule944_id3803_at_idx61417 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule944_id3803_at_idx61417 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[MLAv8i8_:%[0-9]+]]:fpr64 = MLAv8i8 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLAv8i8_]] + %4:fpr(<8 x s8>) = COPY $d2 + %3:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s8>) = G_MUL %3, %4 + %1:fpr(<8 x s8>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<8 x s8>) + +... +--- +name: test_rule945_id3804_at_idx61505 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule945_id3804_at_idx61505 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[MLAv16i8_:%[0-9]+]]:fpr128 = MLAv16i8 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLAv16i8_]] + %4:fpr(<16 x s8>) = COPY $q2 + %3:fpr(<16 x s8>) = COPY $q1 + %2:fpr(<16 x s8>) = COPY $q0 + %0:fpr(<16 x s8>) = G_MUL %3, %4 + %1:fpr(<16 x s8>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<16 x s8>) + +... +--- +name: test_rule946_id3805_at_idx61593 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule946_id3805_at_idx61593 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[MLAv4i16_:%[0-9]+]]:fpr64 = MLAv4i16 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLAv4i16_]] + %4:fpr(<4 x s16>) = COPY $d2 + %3:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s16>) = G_MUL %3, %4 + %1:fpr(<4 x s16>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<4 x s16>) + +... +--- +name: test_rule947_id3806_at_idx61681 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule947_id3806_at_idx61681 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[MLAv8i16_:%[0-9]+]]:fpr128 = MLAv8i16 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLAv8i16_]] + %4:fpr(<8 x s16>) = COPY $q2 + %3:fpr(<8 x s16>) = COPY $q1 + %2:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_MUL %3, %4 + %1:fpr(<8 x s16>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<8 x s16>) + +... +--- +name: test_rule950_id3869_at_idx61945 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule950_id3869_at_idx61945 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SADDWv8i8_v8i16_:%[0-9]+]]:fpr128 = SADDWv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SADDWv8i8_v8i16_]] + %3:fpr(<8 x s8>) = COPY $d0 + %2:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_SEXT %3(<8 x s8>) + %1:fpr(<8 x s16>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<8 x s16>) + +... +--- +name: test_rule951_id3871_at_idx62021 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule951_id3871_at_idx62021 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SADDWv4i16_v4i32_:%[0-9]+]]:fpr128 = SADDWv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SADDWv4i16_v4i32_]] + %3:fpr(<4 x s16>) = COPY $d0 + %2:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_SEXT %3(<4 x s16>) + %1:fpr(<4 x s32>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<4 x s32>) + +... +--- +name: test_rule952_id3873_at_idx62097 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule952_id3873_at_idx62097 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SADDWv2i32_v2i64_:%[0-9]+]]:fpr128 = SADDWv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SADDWv2i32_v2i64_]] + %3:fpr(<2 x s32>) = COPY $d0 + %2:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_SEXT %3(<2 x s32>) + %1:fpr(<2 x s64>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<2 x s64>) + +... +--- +name: test_rule953_id3887_at_idx62173 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule953_id3887_at_idx62173 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UADDWv8i8_v8i16_:%[0-9]+]]:fpr128 = UADDWv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UADDWv8i8_v8i16_]] + %3:fpr(<8 x s8>) = COPY $d0 + %2:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_ZEXT %3(<8 x s8>) + %1:fpr(<8 x s16>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<8 x s16>) + +... +--- +name: test_rule954_id3889_at_idx62249 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule954_id3889_at_idx62249 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UADDWv4i16_v4i32_:%[0-9]+]]:fpr128 = UADDWv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UADDWv4i16_v4i32_]] + %3:fpr(<4 x s16>) = COPY $d0 + %2:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_ZEXT %3(<4 x s16>) + %1:fpr(<4 x s32>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<4 x s32>) + +... +--- +name: test_rule955_id3891_at_idx62325 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule955_id3891_at_idx62325 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UADDWv2i32_v2i64_:%[0-9]+]]:fpr128 = UADDWv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UADDWv2i32_v2i64_]] + %3:fpr(<2 x s32>) = COPY $d0 + %2:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_ZEXT %3(<2 x s32>) + %1:fpr(<2 x s64>) = G_ADD %0, %2 + $noreg = PATCHABLE_RET %1(<2 x s64>) + +... +--- +name: test_rule956_id927_at_idx62401 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule956_id927_at_idx62401 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[MLAv8i8_:%[0-9]+]]:fpr64 = MLAv8i8 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLAv8i8_]] + %4:fpr(<8 x s8>) = COPY $d2 + %3:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s8>) = G_MUL %3, %4 + %1:fpr(<8 x s8>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<8 x s8>) + +... +--- +name: test_rule957_id928_at_idx62489 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule957_id928_at_idx62489 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[MLAv16i8_:%[0-9]+]]:fpr128 = MLAv16i8 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLAv16i8_]] + %4:fpr(<16 x s8>) = COPY $q2 + %3:fpr(<16 x s8>) = COPY $q1 + %2:fpr(<16 x s8>) = COPY $q0 + %0:fpr(<16 x s8>) = G_MUL %3, %4 + %1:fpr(<16 x s8>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<16 x s8>) + +... +--- +name: test_rule958_id929_at_idx62577 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule958_id929_at_idx62577 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[MLAv4i16_:%[0-9]+]]:fpr64 = MLAv4i16 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLAv4i16_]] + %4:fpr(<4 x s16>) = COPY $d2 + %3:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s16>) = G_MUL %3, %4 + %1:fpr(<4 x s16>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<4 x s16>) + +... +--- +name: test_rule959_id930_at_idx62665 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule959_id930_at_idx62665 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[MLAv8i16_:%[0-9]+]]:fpr128 = MLAv8i16 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLAv8i16_]] + %4:fpr(<8 x s16>) = COPY $q2 + %3:fpr(<8 x s16>) = COPY $q1 + %2:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_MUL %3, %4 + %1:fpr(<8 x s16>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<8 x s16>) + +... +--- +name: test_rule962_id1272_at_idx62929 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule962_id1272_at_idx62929 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SADDWv8i8_v8i16_:%[0-9]+]]:fpr128 = SADDWv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SADDWv8i8_v8i16_]] + %3:fpr(<8 x s8>) = COPY $d0 + %2:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_SEXT %3(<8 x s8>) + %1:fpr(<8 x s16>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<8 x s16>) + +... +--- +name: test_rule963_id1274_at_idx63005 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule963_id1274_at_idx63005 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SADDWv4i16_v4i32_:%[0-9]+]]:fpr128 = SADDWv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SADDWv4i16_v4i32_]] + %3:fpr(<4 x s16>) = COPY $d0 + %2:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_SEXT %3(<4 x s16>) + %1:fpr(<4 x s32>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<4 x s32>) + +... +--- +name: test_rule964_id1276_at_idx63081 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule964_id1276_at_idx63081 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SADDWv2i32_v2i64_:%[0-9]+]]:fpr128 = SADDWv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SADDWv2i32_v2i64_]] + %3:fpr(<2 x s32>) = COPY $d0 + %2:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_SEXT %3(<2 x s32>) + %1:fpr(<2 x s64>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<2 x s64>) + +... +--- +name: test_rule965_id1332_at_idx63157 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule965_id1332_at_idx63157 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UADDWv8i8_v8i16_:%[0-9]+]]:fpr128 = UADDWv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UADDWv8i8_v8i16_]] + %3:fpr(<8 x s8>) = COPY $d0 + %2:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_ZEXT %3(<8 x s8>) + %1:fpr(<8 x s16>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<8 x s16>) + +... +--- +name: test_rule966_id1334_at_idx63233 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule966_id1334_at_idx63233 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UADDWv4i16_v4i32_:%[0-9]+]]:fpr128 = UADDWv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UADDWv4i16_v4i32_]] + %3:fpr(<4 x s16>) = COPY $d0 + %2:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_ZEXT %3(<4 x s16>) + %1:fpr(<4 x s32>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<4 x s32>) + +... +--- +name: test_rule967_id1336_at_idx63309 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule967_id1336_at_idx63309 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UADDWv2i32_v2i64_:%[0-9]+]]:fpr128 = UADDWv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UADDWv2i32_v2i64_]] + %3:fpr(<2 x s32>) = COPY $d0 + %2:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_ZEXT %3(<2 x s32>) + %1:fpr(<2 x s64>) = G_ADD %2, %0 + $noreg = PATCHABLE_RET %1(<2 x s64>) + +... +--- +name: test_rule977_id933_at_idx64051 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule977_id933_at_idx64051 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[MLSv8i8_:%[0-9]+]]:fpr64 = MLSv8i8 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLSv8i8_]] + %4:fpr(<8 x s8>) = COPY $d2 + %3:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s8>) = G_MUL %3, %4 + %1:fpr(<8 x s8>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<8 x s8>) + +... +--- +name: test_rule978_id934_at_idx64139 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule978_id934_at_idx64139 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[MLSv16i8_:%[0-9]+]]:fpr128 = MLSv16i8 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLSv16i8_]] + %4:fpr(<16 x s8>) = COPY $q2 + %3:fpr(<16 x s8>) = COPY $q1 + %2:fpr(<16 x s8>) = COPY $q0 + %0:fpr(<16 x s8>) = G_MUL %3, %4 + %1:fpr(<16 x s8>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<16 x s8>) + +... +--- +name: test_rule979_id935_at_idx64227 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule979_id935_at_idx64227 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[MLSv4i16_:%[0-9]+]]:fpr64 = MLSv4i16 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLSv4i16_]] + %4:fpr(<4 x s16>) = COPY $d2 + %3:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s16>) = G_MUL %3, %4 + %1:fpr(<4 x s16>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<4 x s16>) + +... +--- +name: test_rule980_id936_at_idx64315 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$q1', virtual-reg: '%3' } + - { reg: '$q2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule980_id936_at_idx64315 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[MLSv8i16_:%[0-9]+]]:fpr128 = MLSv8i16 [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MLSv8i16_]] + %4:fpr(<8 x s16>) = COPY $q2 + %3:fpr(<8 x s16>) = COPY $q1 + %2:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_MUL %3, %4 + %1:fpr(<8 x s16>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<8 x s16>) + +... +--- +name: test_rule983_id1314_at_idx64579 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule983_id1314_at_idx64579 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SSUBWv8i8_v8i16_:%[0-9]+]]:fpr128 = SSUBWv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SSUBWv8i8_v8i16_]] + %3:fpr(<8 x s8>) = COPY $d0 + %2:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_SEXT %3(<8 x s8>) + %1:fpr(<8 x s16>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<8 x s16>) + +... +--- +name: test_rule984_id1316_at_idx64655 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule984_id1316_at_idx64655 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SSUBWv4i16_v4i32_:%[0-9]+]]:fpr128 = SSUBWv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SSUBWv4i16_v4i32_]] + %3:fpr(<4 x s16>) = COPY $d0 + %2:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_SEXT %3(<4 x s16>) + %1:fpr(<4 x s32>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<4 x s32>) + +... +--- +name: test_rule985_id1318_at_idx64731 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule985_id1318_at_idx64731 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SSUBWv2i32_v2i64_:%[0-9]+]]:fpr128 = SSUBWv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SSUBWv2i32_v2i64_]] + %3:fpr(<2 x s32>) = COPY $d0 + %2:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_SEXT %3(<2 x s32>) + %1:fpr(<2 x s64>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<2 x s64>) + +... +--- +name: test_rule986_id1362_at_idx64807 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule986_id1362_at_idx64807 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[USUBWv8i8_v8i16_:%[0-9]+]]:fpr128 = USUBWv8i8_v8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[USUBWv8i8_v8i16_]] + %3:fpr(<8 x s8>) = COPY $d0 + %2:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_ZEXT %3(<8 x s8>) + %1:fpr(<8 x s16>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<8 x s16>) + +... +--- +name: test_rule987_id1364_at_idx64883 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule987_id1364_at_idx64883 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[USUBWv4i16_v4i32_:%[0-9]+]]:fpr128 = USUBWv4i16_v4i32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[USUBWv4i16_v4i32_]] + %3:fpr(<4 x s16>) = COPY $d0 + %2:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_ZEXT %3(<4 x s16>) + %1:fpr(<4 x s32>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<4 x s32>) + +... +--- +name: test_rule988_id1366_at_idx64959 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%2' } + - { reg: '$d0', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $d0 + + ; CHECK-LABEL: name: test_rule988_id1366_at_idx64959 + ; CHECK: liveins: $q0, $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[USUBWv2i32_v2i64_:%[0-9]+]]:fpr128 = USUBWv2i32_v2i64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[USUBWv2i32_v2i64_]] + %3:fpr(<2 x s32>) = COPY $d0 + %2:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_ZEXT %3(<2 x s32>) + %1:fpr(<2 x s64>) = G_SUB %2, %0 + $noreg = PATCHABLE_RET %1(<2 x s64>) + +... +--- +name: test_rule990_id432_at_idx65123 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$s0', virtual-reg: '%2' } + - { reg: '$s1', virtual-reg: '%3' } + - { reg: '$s2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $s0, $s1, $s2 + + ; CHECK-LABEL: name: test_rule990_id432_at_idx65123 + ; CHECK: liveins: $s0, $s1, $s2 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FNMADDSrrr:%[0-9]+]]:fpr32 = FNMADDSrrr [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMADDSrrr]] + %4:fpr(s32) = COPY $s2 + %3:fpr(s32) = COPY $s1 + %2:fpr(s32) = COPY $s0 + %0:fpr(s32) = G_FMA %2, %3, %4 + %1:fpr(s32) = G_FNEG %0 + $noreg = PATCHABLE_RET %1(s32) + +... +--- +name: test_rule991_id433_at_idx65211 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } + - { reg: '$d2', virtual-reg: '%4' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule991_id433_at_idx65211 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FNMADDDrrr:%[0-9]+]]:fpr64 = FNMADDDrrr [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMADDDrrr]] + %4:fpr(s64) = COPY $d2 + %3:fpr(s64) = COPY $d1 + %2:fpr(s64) = COPY $d0 + %0:fpr(s64) = G_FMA %2, %3, %4 + %1:fpr(s64) = G_FNEG %0 + $noreg = PATCHABLE_RET %1(s64) + +... +--- +name: test_rule993_id420_at_idx65375 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$s0', virtual-reg: '%2' } + - { reg: '$s1', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $s0, $s1 + + ; CHECK-LABEL: name: test_rule993_id420_at_idx65375 + ; CHECK: liveins: $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FNMULSrr:%[0-9]+]]:fpr32 = FNMULSrr [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMULSrr]] + %3:fpr(s32) = COPY $s1 + %2:fpr(s32) = COPY $s0 + %0:fpr(s32) = G_FMUL %2, %3 + %1:fpr(s32) = G_FNEG %0 + $noreg = PATCHABLE_RET %1(s32) + +... +--- +name: test_rule994_id421_at_idx65451 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%2' } + - { reg: '$d1', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule994_id421_at_idx65451 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FNMULDrr:%[0-9]+]]:fpr64 = FNMULDrr [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNMULDrr]] + %3:fpr(s64) = COPY $d1 + %2:fpr(s64) = COPY $d0 + %0:fpr(s64) = G_FMUL %2, %3 + %1:fpr(s64) = G_FNEG %0 + $noreg = PATCHABLE_RET %1(s64) + +... +--- +name: test_rule1230_id2969_at_idx81784 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_rule1230_id2969_at_idx81784 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY $x0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[COPY1]] + ; CHECK: ST1Onev8b [[COPY2]], [[COPY]] :: (store 8) + ; CHECK: $noreg = PATCHABLE_RET + %1:gpr(p0) = COPY $x1 + %0:gpr(<8 x s8>) = COPY $x0 + G_STORE %0(<8 x s8>), %1(p0) :: (store 8) + $noreg = PATCHABLE_RET + +... +--- +name: test_rule1231_id2970_at_idx81816 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_rule1231_id2970_at_idx81816 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY $x0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[COPY1]] + ; CHECK: ST1Onev4h [[COPY2]], [[COPY]] :: (store 8) + ; CHECK: $noreg = PATCHABLE_RET + %1:gpr(p0) = COPY $x1 + %0:gpr(<4 x s16>) = COPY $x0 + G_STORE %0(<4 x s16>), %1(p0) :: (store 8) + $noreg = PATCHABLE_RET + +... +--- +name: test_rule1239_id894_at_idx82201 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } + - { reg: '$d2', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $d0, $d1, $d2 + + ; CHECK-LABEL: name: test_rule1239_id894_at_idx82201 + ; CHECK: liveins: $d0, $d1, $d2 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FMLAv2f32_:%[0-9]+]]:fpr64 = FMLAv2f32 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[FMLAv2f32_]] + %3:fpr(<2 x s32>) = COPY $d2 + %2:fpr(<2 x s32>) = COPY $d1 + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FMA %1, %2, %3 + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1240_id895_at_idx82269 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } + - { reg: '$q2', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule1240_id895_at_idx82269 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FMLAv4f32_:%[0-9]+]]:fpr128 = FMLAv4f32 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[FMLAv4f32_]] + %3:fpr(<4 x s32>) = COPY $q2 + %2:fpr(<4 x s32>) = COPY $q1 + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FMA %1, %2, %3 + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1241_id896_at_idx82337 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } + - { reg: '$q2', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test_rule1241_id896_at_idx82337 + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FMLAv2f64_:%[0-9]+]]:fpr128 = FMLAv2f64 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[FMLAv2f64_]] + %3:fpr(<2 x s64>) = COPY $q2 + %2:fpr(<2 x s64>) = COPY $q1 + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FMA %1, %2, %3 + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1244_id751_at_idx82487 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1244_id751_at_idx82487 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[ADDv8i8_:%[0-9]+]]:fpr64 = ADDv8i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv8i8_]] + %2:fpr(<8 x s8>) = COPY $d1 + %1:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s8>) = G_ADD %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s8>) + +... +--- +name: test_rule1245_id752_at_idx82530 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1245_id752_at_idx82530 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[ADDv16i8_:%[0-9]+]]:fpr128 = ADDv16i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv16i8_]] + %2:fpr(<16 x s8>) = COPY $q1 + %1:fpr(<16 x s8>) = COPY $q0 + %0:fpr(<16 x s8>) = G_ADD %1, %2 + $noreg = PATCHABLE_RET %0(<16 x s8>) + +... +--- +name: test_rule1246_id753_at_idx82573 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1246_id753_at_idx82573 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[ADDv4i16_:%[0-9]+]]:fpr64 = ADDv4i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv4i16_]] + %2:fpr(<4 x s16>) = COPY $d1 + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s16>) = G_ADD %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s16>) + +... +--- +name: test_rule1247_id754_at_idx82616 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1247_id754_at_idx82616 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[ADDv8i16_:%[0-9]+]]:fpr128 = ADDv8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv8i16_]] + %2:fpr(<8 x s16>) = COPY $q1 + %1:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_ADD %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule1254_id1162_at_idx82913 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1254_id1162_at_idx82913 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ANDv8i8_]] + %2:fpr(<8 x s8>) = COPY $d1 + %1:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s8>) = G_AND %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s8>) + +... +--- +name: test_rule1255_id1163_at_idx82956 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1255_id1163_at_idx82956 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[ANDv16i8_:%[0-9]+]]:fpr128 = ANDv16i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ANDv16i8_]] + %2:fpr(<16 x s8>) = COPY $q1 + %1:fpr(<16 x s8>) = COPY $q0 + %0:fpr(<16 x s8>) = G_AND %1, %2 + $noreg = PATCHABLE_RET %0(<16 x s8>) + +... +--- +name: test_rule1256_id1751_at_idx82999 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1256_id1751_at_idx82999 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ANDv8i8_]] + %2:fpr(<4 x s16>) = COPY $d1 + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s16>) = G_AND %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s16>) + +... +--- +name: test_rule1259_id1754_at_idx83128 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1259_id1754_at_idx83128 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[ANDv16i8_:%[0-9]+]]:fpr128 = ANDv16i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ANDv16i8_]] + %2:fpr(<8 x s16>) = COPY $q1 + %1:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_AND %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule1268_id829_at_idx83513 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1268_id829_at_idx83513 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FADDv2f32_]] + %2:fpr(<2 x s32>) = COPY $d1 + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FADD %1, %2 + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1269_id830_at_idx83556 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1269_id830_at_idx83556 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FADDv4f32_:%[0-9]+]]:fpr128 = FADDv4f32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FADDv4f32_]] + %2:fpr(<4 x s32>) = COPY $q1 + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FADD %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1270_id831_at_idx83599 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1270_id831_at_idx83599 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FADDv2f64_:%[0-9]+]]:fpr128 = FADDv2f64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FADDv2f64_]] + %2:fpr(<2 x s64>) = COPY $q1 + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FADD %1, %2 + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1276_id849_at_idx83857 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1276_id849_at_idx83857 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FDIVv2f32_:%[0-9]+]]:fpr64 = FDIVv2f32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FDIVv2f32_]] + %2:fpr(<2 x s32>) = COPY $d1 + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FDIV %1, %2 + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1277_id850_at_idx83900 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1277_id850_at_idx83900 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FDIVv4f32_:%[0-9]+]]:fpr128 = FDIVv4f32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FDIVv4f32_]] + %2:fpr(<4 x s32>) = COPY $q1 + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FDIV %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1278_id851_at_idx83943 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1278_id851_at_idx83943 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FDIVv2f64_:%[0-9]+]]:fpr128 = FDIVv2f64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FDIVv2f64_]] + %2:fpr(<2 x s64>) = COPY $q1 + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FDIV %1, %2 + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1284_id909_at_idx84201 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1284_id909_at_idx84201 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FMULv2f32_:%[0-9]+]]:fpr64 = FMULv2f32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FMULv2f32_]] + %2:fpr(<2 x s32>) = COPY $d1 + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FMUL %1, %2 + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1285_id910_at_idx84244 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1285_id910_at_idx84244 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FMULv4f32_:%[0-9]+]]:fpr128 = FMULv4f32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FMULv4f32_]] + %2:fpr(<4 x s32>) = COPY $q1 + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FMUL %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1286_id911_at_idx84287 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1286_id911_at_idx84287 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FMULv2f64_:%[0-9]+]]:fpr128 = FMULv2f64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FMULv2f64_]] + %2:fpr(<2 x s64>) = COPY $q1 + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FMUL %1, %2 + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1292_id924_at_idx84545 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1292_id924_at_idx84545 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FSUBv2f32_:%[0-9]+]]:fpr64 = FSUBv2f32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FSUBv2f32_]] + %2:fpr(<2 x s32>) = COPY $d1 + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FSUB %1, %2 + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1293_id925_at_idx84588 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1293_id925_at_idx84588 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FSUBv4f32_:%[0-9]+]]:fpr128 = FSUBv4f32 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FSUBv4f32_]] + %2:fpr(<4 x s32>) = COPY $q1 + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FSUB %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1294_id926_at_idx84631 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1294_id926_at_idx84631 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FSUBv2f64_:%[0-9]+]]:fpr128 = FSUBv2f64 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FSUBv2f64_]] + %2:fpr(<2 x s64>) = COPY $q1 + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FSUB %1, %2 + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1296_id939_at_idx84715 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1296_id939_at_idx84715 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[MULv8i8_:%[0-9]+]]:fpr64 = MULv8i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MULv8i8_]] + %2:fpr(<8 x s8>) = COPY $d1 + %1:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s8>) = G_MUL %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s8>) + +... +--- +name: test_rule1297_id940_at_idx84758 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1297_id940_at_idx84758 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[MULv16i8_:%[0-9]+]]:fpr128 = MULv16i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MULv16i8_]] + %2:fpr(<16 x s8>) = COPY $q1 + %1:fpr(<16 x s8>) = COPY $q0 + %0:fpr(<16 x s8>) = G_MUL %1, %2 + $noreg = PATCHABLE_RET %0(<16 x s8>) + +... +--- +name: test_rule1298_id941_at_idx84801 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1298_id941_at_idx84801 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[MULv4i16_:%[0-9]+]]:fpr64 = MULv4i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MULv4i16_]] + %2:fpr(<4 x s16>) = COPY $d1 + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s16>) = G_MUL %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s16>) + +... +--- +name: test_rule1299_id942_at_idx84844 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1299_id942_at_idx84844 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[MULv8i16_:%[0-9]+]]:fpr128 = MULv8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[MULv8i16_]] + %2:fpr(<8 x s16>) = COPY $q1 + %1:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_MUL %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule1304_id1174_at_idx85055 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1304_id1174_at_idx85055 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[ORRv8i8_:%[0-9]+]]:fpr64 = ORRv8i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ORRv8i8_]] + %2:fpr(<8 x s8>) = COPY $d1 + %1:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s8>) = G_OR %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s8>) + +... +--- +name: test_rule1305_id1175_at_idx85098 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1305_id1175_at_idx85098 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[ORRv16i8_:%[0-9]+]]:fpr128 = ORRv16i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ORRv16i8_]] + %2:fpr(<16 x s8>) = COPY $q1 + %1:fpr(<16 x s8>) = COPY $q0 + %0:fpr(<16 x s8>) = G_OR %1, %2 + $noreg = PATCHABLE_RET %0(<16 x s8>) + +... +--- +name: test_rule1306_id1827_at_idx85141 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1306_id1827_at_idx85141 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[ORRv8i8_:%[0-9]+]]:fpr64 = ORRv8i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ORRv8i8_]] + %2:fpr(<4 x s16>) = COPY $d1 + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s16>) = G_OR %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s16>) + +... +--- +name: test_rule1309_id1830_at_idx85270 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1309_id1830_at_idx85270 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[ORRv16i8_:%[0-9]+]]:fpr128 = ORRv16i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[ORRv16i8_]] + %2:fpr(<8 x s16>) = COPY $q1 + %1:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_OR %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule1315_id1051_at_idx85522 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1315_id1051_at_idx85522 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SUBv8i8_:%[0-9]+]]:fpr64 = SUBv8i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SUBv8i8_]] + %2:fpr(<8 x s8>) = COPY $d1 + %1:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s8>) = G_SUB %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s8>) + +... +--- +name: test_rule1316_id1052_at_idx85565 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1316_id1052_at_idx85565 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SUBv16i8_:%[0-9]+]]:fpr128 = SUBv16i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SUBv16i8_]] + %2:fpr(<16 x s8>) = COPY $q1 + %1:fpr(<16 x s8>) = COPY $q0 + %0:fpr(<16 x s8>) = G_SUB %1, %2 + $noreg = PATCHABLE_RET %0(<16 x s8>) + +... +--- +name: test_rule1317_id1053_at_idx85608 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1317_id1053_at_idx85608 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SUBv4i16_:%[0-9]+]]:fpr64 = SUBv4i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SUBv4i16_]] + %2:fpr(<4 x s16>) = COPY $d1 + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s16>) = G_SUB %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s16>) + +... +--- +name: test_rule1318_id1054_at_idx85651 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1318_id1054_at_idx85651 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SUBv8i16_:%[0-9]+]]:fpr128 = SUBv8i16 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SUBv8i16_]] + %2:fpr(<8 x s16>) = COPY $q1 + %1:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_SUB %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule1329_id1170_at_idx86118 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1329_id1170_at_idx86118 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[EORv8i8_:%[0-9]+]]:fpr64 = EORv8i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[EORv8i8_]] + %2:fpr(<8 x s8>) = COPY $d1 + %1:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s8>) = G_XOR %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s8>) + +... +--- +name: test_rule1330_id1171_at_idx86161 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1330_id1171_at_idx86161 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[EORv16i8_:%[0-9]+]]:fpr128 = EORv16i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[EORv16i8_]] + %2:fpr(<16 x s8>) = COPY $q1 + %1:fpr(<16 x s8>) = COPY $q0 + %0:fpr(<16 x s8>) = G_XOR %1, %2 + $noreg = PATCHABLE_RET %0(<16 x s8>) + +... +--- +name: test_rule1331_id1791_at_idx86204 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } + - { reg: '$d1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: test_rule1331_id1791_at_idx86204 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[EORv8i8_:%[0-9]+]]:fpr64 = EORv8i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[EORv8i8_]] + %2:fpr(<4 x s16>) = COPY $d1 + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s16>) = G_XOR %1, %2 + $noreg = PATCHABLE_RET %0(<4 x s16>) + +... +--- +name: test_rule1334_id1794_at_idx86333 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } + - { reg: '$q1', virtual-reg: '%2' } +body: | + bb.0.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_rule1334_id1794_at_idx86333 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[EORv16i8_:%[0-9]+]]:fpr128 = EORv16i8 [[COPY1]], [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[EORv16i8_]] + %2:fpr(<8 x s16>) = COPY $q1 + %1:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s16>) = G_XOR %1, %2 + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule1337_id2925_at_idx86462 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1337_id2925_at_idx86462 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USHLLv8i8_shift:%[0-9]+]]:fpr128 = USHLLv8i8_shift [[COPY]], 0 + ; CHECK: $noreg = PATCHABLE_RET [[USHLLv8i8_shift]] + %1:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s16>) = G_ANYEXT %1(<8 x s8>) + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule1338_id2928_at_idx86507 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1338_id2928_at_idx86507 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USHLLv4i16_shift:%[0-9]+]]:fpr128 = USHLLv4i16_shift [[COPY]], 0 + ; CHECK: $noreg = PATCHABLE_RET [[USHLLv4i16_shift]] + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s32>) = G_ANYEXT %1(<4 x s16>) + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1339_id2931_at_idx86552 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1339_id2931_at_idx86552 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USHLLv2i32_shift:%[0-9]+]]:fpr128 = USHLLv2i32_shift [[COPY]], 0 + ; CHECK: $noreg = PATCHABLE_RET [[USHLLv2i32_shift]] + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s64>) = G_ANYEXT %1(<2 x s32>) + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1582_id372_at_idx97075 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$s0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $s0 + + ; CHECK-LABEL: name: test_rule1582_id372_at_idx97075 + ; CHECK: liveins: $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FNEGSr:%[0-9]+]]:fpr32 = FNEGSr [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNEGSr]] + %1:fpr(s32) = COPY $s0 + %0:fpr(s32) = G_FNEG %1 + $noreg = PATCHABLE_RET %0(s32) + +... +--- +name: test_rule1583_id373_at_idx97110 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1583_id373_at_idx97110 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNEGDr]] + %1:fpr(s64) = COPY $d0 + %0:fpr(s64) = G_FNEG %1 + $noreg = PATCHABLE_RET %0(s64) + +... +--- +name: test_rule1586_id597_at_idx97215 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1586_id597_at_idx97215 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FNEGv2f32_:%[0-9]+]]:fpr64 = FNEGv2f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNEGv2f32_]] + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FNEG %1 + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1587_id598_at_idx97250 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1587_id598_at_idx97250 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FNEGv4f32_:%[0-9]+]]:fpr128 = FNEGv4f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNEGv4f32_]] + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FNEG %1 + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1588_id599_at_idx97285 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1588_id599_at_idx97285 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FNEGv2f64_:%[0-9]+]]:fpr128 = FNEGv2f64 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FNEGv2f64_]] + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FNEG %1 + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1592_id2383_at_idx97425 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1592_id2383_at_idx97425 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FCVTLv2i32_:%[0-9]+]]:fpr128 = FCVTLv2i32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTLv2i32_]] + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s64>) = G_FPEXT %1(<2 x s32>) + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1593_id2385_at_idx97458 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1593_id2385_at_idx97458 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FCVTLv4i16_:%[0-9]+]]:fpr128 = FCVTLv4i16 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTLv4i16_]] + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s32>) = G_FPEXT %1(<4 x s16>) + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1602_id587_at_idx97771 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1602_id587_at_idx97771 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FCVTZSv2f32_:%[0-9]+]]:fpr64 = FCVTZSv2f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTZSv2f32_]] + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FPTOSI %1(<2 x s32>) + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1603_id588_at_idx97806 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1603_id588_at_idx97806 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FCVTZSv4f32_:%[0-9]+]]:fpr128 = FCVTZSv4f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTZSv4f32_]] + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FPTOSI %1(<4 x s32>) + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1604_id589_at_idx97841 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1604_id589_at_idx97841 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FCVTZSv2f64_:%[0-9]+]]:fpr128 = FCVTZSv2f64 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTZSv2f64_]] + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FPTOSI %1(<2 x s64>) + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1613_id592_at_idx98156 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1613_id592_at_idx98156 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FCVTZUv2f32_:%[0-9]+]]:fpr64 = FCVTZUv2f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTZUv2f32_]] + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_FPTOUI %1(<2 x s32>) + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1614_id593_at_idx98191 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1614_id593_at_idx98191 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FCVTZUv4f32_:%[0-9]+]]:fpr128 = FCVTZUv4f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTZUv4f32_]] + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_FPTOUI %1(<4 x s32>) + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1615_id594_at_idx98226 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1615_id594_at_idx98226 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FCVTZUv2f64_:%[0-9]+]]:fpr128 = FCVTZUv2f64 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTZUv2f64_]] + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_FPTOUI %1(<2 x s64>) + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1619_id2389_at_idx98366 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1619_id2389_at_idx98366 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FCVTNv2i32_:%[0-9]+]]:fpr64 = FCVTNv2i32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTNv2i32_]] + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s32>) = G_FPTRUNC %1(<2 x s64>) + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1620_id2390_at_idx98399 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1620_id2390_at_idx98399 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FCVTNv4i16_:%[0-9]+]]:fpr64 = FCVTNv4i16 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[FCVTNv4i16_]] + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s16>) = G_FPTRUNC %1(<4 x s32>) + $noreg = PATCHABLE_RET %0(<4 x s16>) + +... +--- +name: test_rule1621_id2923_at_idx98432 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1621_id2923_at_idx98432 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SSHLLv8i8_shift:%[0-9]+]]:fpr128 = SSHLLv8i8_shift [[COPY]], 0 + ; CHECK: $noreg = PATCHABLE_RET [[SSHLLv8i8_shift]] + %1:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s16>) = G_SEXT %1(<8 x s8>) + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule1622_id2926_at_idx98477 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1622_id2926_at_idx98477 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SSHLLv4i16_shift:%[0-9]+]]:fpr128 = SSHLLv4i16_shift [[COPY]], 0 + ; CHECK: $noreg = PATCHABLE_RET [[SSHLLv4i16_shift]] + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s32>) = G_SEXT %1(<4 x s16>) + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1623_id2929_at_idx98522 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1623_id2929_at_idx98522 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SSHLLv2i32_shift:%[0-9]+]]:fpr128 = SSHLLv2i32_shift [[COPY]], 0 + ; CHECK: $noreg = PATCHABLE_RET [[SSHLLv2i32_shift]] + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s64>) = G_SEXT %1(<2 x s32>) + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1632_id687_at_idx98847 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1632_id687_at_idx98847 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SCVTFv2f32_:%[0-9]+]]:fpr64 = SCVTFv2f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SCVTFv2f32_]] + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_SITOFP %1(<2 x s32>) + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1633_id688_at_idx98882 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1633_id688_at_idx98882 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SCVTFv4f32_:%[0-9]+]]:fpr128 = SCVTFv4f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SCVTFv4f32_]] + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_SITOFP %1(<4 x s32>) + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1634_id689_at_idx98917 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1634_id689_at_idx98917 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SCVTFv2f64_:%[0-9]+]]:fpr128 = SCVTFv2f64 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[SCVTFv2f64_]] + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_SITOFP %1(<2 x s64>) + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1635_id748_at_idx98952 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1635_id748_at_idx98952 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[XTNv8i8_]] + %1:fpr(<8 x s16>) = COPY $q0 + %0:fpr(<8 x s8>) = G_TRUNC %1(<8 x s16>) + $noreg = PATCHABLE_RET %0(<8 x s8>) + +... +--- +name: test_rule1636_id749_at_idx98987 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1636_id749_at_idx98987 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[XTNv4i16_]] + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s16>) = G_TRUNC %1(<4 x s32>) + $noreg = PATCHABLE_RET %0(<4 x s16>) + +... +--- +name: test_rule1637_id750_at_idx99022 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1637_id750_at_idx99022 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[XTNv2i32_]] + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s32>) = G_TRUNC %1(<2 x s64>) + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1647_id731_at_idx99386 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1647_id731_at_idx99386 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[UCVTFv2f32_:%[0-9]+]]:fpr64 = UCVTFv2f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UCVTFv2f32_]] + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s32>) = G_UITOFP %1(<2 x s32>) + $noreg = PATCHABLE_RET %0(<2 x s32>) + +... +--- +name: test_rule1648_id732_at_idx99421 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1648_id732_at_idx99421 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UCVTFv4f32_:%[0-9]+]]:fpr128 = UCVTFv4f32 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UCVTFv4f32_]] + %1:fpr(<4 x s32>) = COPY $q0 + %0:fpr(<4 x s32>) = G_UITOFP %1(<4 x s32>) + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1649_id733_at_idx99456 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$q0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $q0 + + ; CHECK-LABEL: name: test_rule1649_id733_at_idx99456 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UCVTFv2f64_:%[0-9]+]]:fpr128 = UCVTFv2f64 [[COPY]] + ; CHECK: $noreg = PATCHABLE_RET [[UCVTFv2f64_]] + %1:fpr(<2 x s64>) = COPY $q0 + %0:fpr(<2 x s64>) = G_UITOFP %1(<2 x s64>) + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... +--- +name: test_rule1650_id2924_at_idx99491 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1650_id2924_at_idx99491 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USHLLv8i8_shift:%[0-9]+]]:fpr128 = USHLLv8i8_shift [[COPY]], 0 + ; CHECK: $noreg = PATCHABLE_RET [[USHLLv8i8_shift]] + %1:fpr(<8 x s8>) = COPY $d0 + %0:fpr(<8 x s16>) = G_ZEXT %1(<8 x s8>) + $noreg = PATCHABLE_RET %0(<8 x s16>) + +... +--- +name: test_rule1651_id2927_at_idx99536 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1651_id2927_at_idx99536 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USHLLv4i16_shift:%[0-9]+]]:fpr128 = USHLLv4i16_shift [[COPY]], 0 + ; CHECK: $noreg = PATCHABLE_RET [[USHLLv4i16_shift]] + %1:fpr(<4 x s16>) = COPY $d0 + %0:fpr(<4 x s32>) = G_ZEXT %1(<4 x s16>) + $noreg = PATCHABLE_RET %0(<4 x s32>) + +... +--- +name: test_rule1652_id2930_at_idx99581 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +liveins: + - { reg: '$d0', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $d0 + + ; CHECK-LABEL: name: test_rule1652_id2930_at_idx99581 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USHLLv2i32_shift:%[0-9]+]]:fpr128 = USHLLv2i32_shift [[COPY]], 0 + ; CHECK: $noreg = PATCHABLE_RET [[USHLLv2i32_shift]] + %1:fpr(<2 x s32>) = COPY $d0 + %0:fpr(<2 x s64>) = G_ZEXT %1(<2 x s32>) + $noreg = PATCHABLE_RET %0(<2 x s64>) + +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-xor.mir b/test/CodeGen/AArch64/GlobalISel/select-xor.mir index 8f0b0dccca6e..cc75386271c8 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-xor.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-xor.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -26,17 +26,17 @@ registers: body: | bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 ; CHECK-LABEL: name: xor_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[EORWrr:%[0-9]+]]:gpr32 = EORWrr [[COPY]], [[COPY1]] - ; CHECK: %w0 = COPY [[EORWrr]] - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 + ; CHECK: $w0 = COPY [[EORWrr]] + %0(s32) = COPY $w0 + %1(s32) = COPY $w1 %2(s32) = G_XOR %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -52,17 +52,17 @@ registers: body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 ; CHECK-LABEL: name: xor_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[EORXrr:%[0-9]+]]:gpr64 = EORXrr [[COPY]], [[COPY1]] - ; CHECK: %x0 = COPY [[EORXrr]] - %0(s64) = COPY %x0 - %1(s64) = COPY %x1 + ; CHECK: $x0 = COPY [[EORXrr]] + %0(s64) = COPY $x0 + %1(s64) = COPY $x1 %2(s64) = G_XOR %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -79,16 +79,16 @@ registers: body: | bb.0: - liveins: %w0 + liveins: $w0 ; CHECK-LABEL: name: xor_constant_n1_s32_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr %wzr, [[COPY]] - ; CHECK: %w0 = COPY [[ORNWrr]] - %0(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr $wzr, [[COPY]] + ; CHECK: $w0 = COPY [[ORNWrr]] + %0(s32) = COPY $w0 %1(s32) = G_CONSTANT i32 -1 %2(s32) = G_XOR %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... --- @@ -104,16 +104,16 @@ registers: body: | bb.0: - liveins: %x0 + liveins: $x0 ; CHECK-LABEL: name: xor_constant_n1_s64_gpr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 - ; CHECK: [[ORNXrr:%[0-9]+]]:gpr64 = ORNXrr %xzr, [[COPY]] - ; CHECK: %x0 = COPY [[ORNXrr]] - %0(s64) = COPY %x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[ORNXrr:%[0-9]+]]:gpr64 = ORNXrr $xzr, [[COPY]] + ; CHECK: $x0 = COPY [[ORNXrr]] + %0(s64) = COPY $x0 %1(s64) = G_CONSTANT i64 -1 %2(s64) = G_XOR %0, %1 - %x0 = COPY %2(s64) + $x0 = COPY %2(s64) ... --- @@ -134,16 +134,16 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: B %bb.1 ; CHECK: bb.1: - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 - ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr %wzr, [[COPY]] - ; CHECK: %w0 = COPY [[ORNWrr]] + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr $wzr, [[COPY]] + ; CHECK: $w0 = COPY [[ORNWrr]] bb.0: - liveins: %w0, %w1 + liveins: $w0, $w1 successors: %bb.1 %1(s32) = G_CONSTANT i32 -1 G_BR %bb.1 bb.1: - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %2(s32) = G_XOR %0, %1 - %w0 = COPY %2(s32) + $w0 = COPY %2(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-zextload.mir b/test/CodeGen/AArch64/GlobalISel/select-zextload.mir new file mode 100644 index 000000000000..8d38ac6d3175 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-zextload.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + define void @zextload_s32_from_s16(i16 *%addr) { ret void } + define void @zextload_s32_from_s16_not_combined(i16 *%addr) { ret void } +... + +--- +name: zextload_s32_from_s16 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: $w0 = COPY [[T0]] + %0:gpr(p0) = COPY $x0 + %1:gpr(s32) = G_ZEXTLOAD %0 :: (load 2 from %ir.addr) + $w0 = COPY %1(s32) +... +--- +name: zextload_s32_from_s16_not_combined +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: zextload_s32_from_s16_not_combined + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[T1:%[0-9]+]]:gpr32 = UBFMWri [[T0]], 0, 15 + ; CHECK: $w0 = COPY [[T1]] + %0:gpr(p0) = COPY $x0 + %1:gpr(s16) = G_LOAD %0 :: (load 2 from %ir.addr) + %2:gpr(s32) = G_ZEXT %1 + $w0 = COPY %2(s32) +... diff --git a/test/CodeGen/AArch64/GlobalISel/select.mir b/test/CodeGen/AArch64/GlobalISel/select.mir index c13b27adbb18..ae7188fb02fd 100644 --- a/test/CodeGen/AArch64/GlobalISel/select.mir +++ b/test/CodeGen/AArch64/GlobalISel/select.mir @@ -1,6 +1,6 @@ -# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=IOS -# RUN: llc -O0 -mtriple=aarch64-linux-gnu -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-DEFAULT -# RUN: llc -O0 -mtriple=aarch64-linux-gnu -relocation-model=pic -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-PIC +# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=IOS +# RUN: llc -O0 -mtriple=aarch64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-DEFAULT +# RUN: llc -O0 -mtriple=aarch64-linux-gnu -relocation-model=pic -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-PIC --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -47,7 +47,7 @@ stack: body: | bb.0: %0(p0) = G_FRAME_INDEX %stack.0.ptr0 - %x0 = COPY %0(p0) + $x0 = COPY %0(p0) ... --- @@ -65,11 +65,11 @@ registers: # CHECK: %2:gpr64 = ADDXrr %0, %1 body: | bb.0: - liveins: %x0 - %0(p0) = COPY %x0 + liveins: $x0 + %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 42 %2(p0) = G_GEP %0, %1(s64) - %x0 = COPY %2(p0) + $x0 = COPY %2(p0) ... --- @@ -82,10 +82,10 @@ regBankSelected: true # CHECK: %1:gpr64sp = ANDXri %0, 8060 body: | bb.0: - liveins: %x0 - %0:gpr(p0) = COPY %x0 + liveins: $x0 + %0:gpr(p0) = COPY $x0 %1:gpr(p0) = G_PTR_MASK %0, 3 - %x0 = COPY %1(p0) + $x0 = COPY %1(p0) ... --- @@ -104,7 +104,7 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @var_local - %x0 = COPY %0(p0) + $x0 = COPY %0(p0) ... --- @@ -122,7 +122,7 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @var_got - %x0 = COPY %0(p0) + $x0 = COPY %0(p0) ... --- @@ -153,36 +153,36 @@ registers: - { id: 11, class: gpr } # CHECK: body: -# CHECK: %wzr = SUBSWrr %0, %0, implicit-def %nzcv -# CHECK: %1:gpr32 = CSINCWr %wzr, %wzr, 1, implicit %nzcv +# CHECK: $wzr = SUBSWrr %0, %0, implicit-def $nzcv +# CHECK: %1:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv -# CHECK: %xzr = SUBSXrr %2, %2, implicit-def %nzcv -# CHECK: %3:gpr32 = CSINCWr %wzr, %wzr, 3, implicit %nzcv +# CHECK: $xzr = SUBSXrr %2, %2, implicit-def $nzcv +# CHECK: %3:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv -# CHECK: %xzr = SUBSXrr %4, %4, implicit-def %nzcv -# CHECK: %5:gpr32 = CSINCWr %wzr, %wzr, 0, implicit %nzcv +# CHECK: $xzr = SUBSXrr %4, %4, implicit-def $nzcv +# CHECK: %5:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv body: | bb.0: - liveins: %w0, %x0 + liveins: $w0, $x0 - %0(s32) = COPY %w0 + %0(s32) = COPY $w0 %1(s32) = G_ICMP intpred(eq), %0, %0 %6(s1) = G_TRUNC %1(s32) %9(s32) = G_ANYEXT %6 - %w0 = COPY %9(s32) + $w0 = COPY %9(s32) - %2(s64) = COPY %x0 + %2(s64) = COPY $x0 %3(s32) = G_ICMP intpred(uge), %2, %2 %7(s1) = G_TRUNC %3(s32) %10(s32) = G_ANYEXT %7 - %w0 = COPY %10(s32) + $w0 = COPY %10(s32) - %4(p0) = COPY %x0 + %4(p0) = COPY $x0 %5(s32) = G_ICMP intpred(ne), %4, %4 %8(s1) = G_TRUNC %5(s32) %11(s32) = G_ANYEXT %8 - %w0 = COPY %11(s32) + $w0 = COPY %11(s32) ... --- @@ -209,29 +209,29 @@ registers: - { id: 7, class: gpr } # CHECK: body: -# CHECK: FCMPSrr %0, %0, implicit-def %nzcv -# CHECK: [[TST_MI:%[0-9]+]]:gpr32 = CSINCWr %wzr, %wzr, 5, implicit %nzcv -# CHECK: [[TST_GT:%[0-9]+]]:gpr32 = CSINCWr %wzr, %wzr, 13, implicit %nzcv +# CHECK: FCMPSrr %0, %0, implicit-def $nzcv +# CHECK: [[TST_MI:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv +# CHECK: [[TST_GT:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv # CHECK: %1:gpr32 = ORRWrr [[TST_MI]], [[TST_GT]] -# CHECK: FCMPDrr %2, %2, implicit-def %nzcv -# CHECK: %3:gpr32 = CSINCWr %wzr, %wzr, 4, implicit %nzcv +# CHECK: FCMPDrr %2, %2, implicit-def $nzcv +# CHECK: %3:gpr32 = CSINCWr $wzr, $wzr, 4, implicit $nzcv body: | bb.0: - liveins: %w0, %x0 + liveins: $w0, $x0 - %0(s32) = COPY %s0 + %0(s32) = COPY $s0 %1(s32) = G_FCMP floatpred(one), %0, %0 %4(s1) = G_TRUNC %1(s32) %6(s32) = G_ANYEXT %4 - %w0 = COPY %6(s32) + $w0 = COPY %6(s32) - %2(s64) = COPY %d0 + %2(s64) = COPY $d0 %3(s32) = G_FCMP floatpred(uge), %2, %2 %5(s1) = G_TRUNC %3(s32) %7(s32) = G_ANYEXT %5 - %w0 = COPY %7(s32) + $w0 = COPY %7(s32) ... @@ -257,10 +257,10 @@ registers: body: | bb.0: - liveins: %s0, %w0 + liveins: $s0, $w0 successors: %bb.1 - %0(s32) = COPY %s0 - %3:gpr(s32) = COPY %w0 + %0(s32) = COPY $s0 + %3:gpr(s32) = COPY $w0 %1(s1) = G_TRUNC %3 bb.1: @@ -269,8 +269,8 @@ body: | G_BRCOND %1, %bb.1 bb.2: - %s0 = COPY %2 - RET_ReallyLR implicit %s0 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 ... --- @@ -304,30 +304,30 @@ registers: - { id: 9, class: gpr } # CHECK: body: -# CHECK: %wzr = ANDSWri %0, 0, implicit-def %nzcv -# CHECK: %3:gpr32 = CSELWr %1, %2, 1, implicit %nzcv -# CHECK: %wzr = ANDSWri %0, 0, implicit-def %nzcv -# CHECK: %6:gpr64 = CSELXr %4, %5, 1, implicit %nzcv -# CHECK: %wzr = ANDSWri %0, 0, implicit-def %nzcv -# CHECK: %9:gpr64 = CSELXr %7, %8, 1, implicit %nzcv +# CHECK: $wzr = ANDSWri %10, 0, implicit-def $nzcv +# CHECK: %3:gpr32 = CSELWr %1, %2, 1, implicit $nzcv +# CHECK: $wzr = ANDSWri %10, 0, implicit-def $nzcv +# CHECK: %6:gpr64 = CSELXr %4, %5, 1, implicit $nzcv +# CHECK: $wzr = ANDSWri %10, 0, implicit-def $nzcv +# CHECK: %9:gpr64 = CSELXr %7, %8, 1, implicit $nzcv body: | bb.0: - liveins: %w0, %w1, %w2 - %10:gpr(s32) = COPY %w0 + liveins: $w0, $w1, $w2 + %10:gpr(s32) = COPY $w0 %0(s1) = G_TRUNC %10 - %1(s32) = COPY %w1 - %2(s32) = COPY %w2 + %1(s32) = COPY $w1 + %2(s32) = COPY $w2 %3(s32) = G_SELECT %0, %1, %2 - %w0 = COPY %3(s32) + $w0 = COPY %3(s32) - %4(s64) = COPY %x0 - %5(s64) = COPY %x1 + %4(s64) = COPY $x0 + %5(s64) = COPY $x1 %6(s64) = G_SELECT %0, %4, %5 - %x0 = COPY %6(s64) + $x0 = COPY %6(s64) - %7(p0) = COPY %x0 - %8(p0) = COPY %x1 + %7(p0) = COPY $x0 + %8(p0) = COPY $x1 %9(p0) = G_SELECT %0, %7, %8 - %x0 = COPY %9(p0) + $x0 = COPY %9(p0) ... diff --git a/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll b/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll new file mode 100644 index 000000000000..e86f9ff3ecdb --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple aarch64 -O0 -stop-after=instruction-select -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s + +%dag = type { { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } }, { { i8, { i8 } }, { i8 } } } + +define void @test_const(%dag* %dst) { + ; CHECK-LABEL: name: test_const + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 10 + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 20 + ; CHECK: [[MOVi32imm2:%[0-9]+]]:gpr32 = MOVi32imm 50 + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3) + ; CHECK: STRBBui [[MOVi32imm2]], [[COPY]], 4 :: (store 1 into %ir.dst + 4) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store 1 into %ir.dst + 7) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 4 :: (store 1 into %ir.dst + 4) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store 1 into %ir.dst + 7) + ; CHECK: RET_ReallyLR +entry: + %updated = insertvalue + ; Check that we're visiting constants with shared parts + ; (deduplicated via LLVMContext, forming a proper DAG) correctly: + %dag { + { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 20 } + } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 20 } + } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 50 } + }, + 0, + 1 + store %dag %updated, %dag* %dst + ; 10, 20, 10, 20, 50, 10, 20, 20 sequence is expected + + store + ; Check that we didn't overwrite a previously seen constant + ; while processing an insertvalue into it: + %dag { + { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 20 } + } + }, + { { i8, { i8 } }, { i8 } } { + { i8, { i8 } } { + i8 10, + { i8 } { i8 20 } + }, + { i8 } { i8 20 } + } + }, + %dag* %dst + ; 10, 20, 10, 20, 20, 10, 20, 20 sequence is expected + ret void +} diff --git a/test/CodeGen/AArch64/GlobalISel/translate-gep.ll b/test/CodeGen/AArch64/GlobalISel/translate-gep.ll index 4b69575079a3..a306cd02dd49 100644 --- a/test/CodeGen/AArch64/GlobalISel/translate-gep.ll +++ b/test/CodeGen/AArch64/GlobalISel/translate-gep.ll @@ -3,16 +3,26 @@ %type = type [4 x {i8, i32}] +define i8* @translate_element_size1(i64 %arg) { +; CHECK-LABEL: name: translate_element_size1 +; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK: [[ZERO:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[BASE:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZERO]] +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET]] + %tmp = getelementptr i8, i8* null, i64 %arg + ret i8* %tmp +} + define %type* @first_offset_const(%type* %addr) { ; CHECK-LABEL: name: first_offset_const ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: %x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) - ; CHECK: %x0 = COPY [[GEP]](p0) - ; CHECK: RET_ReallyLR implicit %x0 + ; CHECK: $x0 = COPY [[GEP]](p0) + ; CHECK: RET_ReallyLR implicit $x0 %res = getelementptr %type, %type* %addr, i32 1 ret %type* %res } @@ -21,11 +31,11 @@ define %type* @first_offset_trivial(%type* %addr) { ; CHECK-LABEL: name: first_offset_trivial ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: %x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) - ; CHECK: %x0 = COPY [[COPY1]](p0) - ; CHECK: RET_ReallyLR implicit %x0 + ; CHECK: $x0 = COPY [[COPY1]](p0) + ; CHECK: RET_ReallyLR implicit $x0 %res = getelementptr %type, %type* %addr, i32 0 ret %type* %res } @@ -34,15 +44,15 @@ define %type* @first_offset_variable(%type* %addr, i64 %idx) { ; CHECK-LABEL: name: first_offset_variable ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: %x0, %x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C]], [[COPY1]] ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[MUL]](s64) ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[GEP]](p0) - ; CHECK: %x0 = COPY [[COPY2]](p0) - ; CHECK: RET_ReallyLR implicit %x0 + ; CHECK: $x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 %res = getelementptr %type, %type* %addr, i64 %idx ret %type* %res } @@ -51,16 +61,16 @@ define %type* @first_offset_ext(%type* %addr, i32 %idx) { ; CHECK-LABEL: name: first_offset_ext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: %w1, %x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %w1 + ; CHECK: liveins: $w1, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C]], [[SEXT]] ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[MUL]](s64) ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[GEP]](p0) - ; CHECK: %x0 = COPY [[COPY2]](p0) - ; CHECK: RET_ReallyLR implicit %x0 + ; CHECK: $x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 %res = getelementptr %type, %type* %addr, i32 %idx ret %type* %res } @@ -70,17 +80,17 @@ define i32* @const_then_var(%type1* %addr, i64 %idx) { ; CHECK-LABEL: name: const_then_var ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: %x0, %x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 272 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C1]], [[COPY1]] ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[GEP]], [[MUL]](s64) ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[GEP1]](p0) - ; CHECK: %x0 = COPY [[COPY2]](p0) - ; CHECK: RET_ReallyLR implicit %x0 + ; CHECK: $x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 %res = getelementptr %type1, %type1* %addr, i32 4, i32 1, i64 %idx ret i32* %res } @@ -89,16 +99,16 @@ define i32* @var_then_const(%type1* %addr, i64 %idx) { ; CHECK-LABEL: name: var_then_const ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: %x0, %x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C]], [[COPY1]] ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[MUL]](s64) ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[GEP]], [[C1]](s64) - ; CHECK: %x0 = COPY [[GEP1]](p0) - ; CHECK: RET_ReallyLR implicit %x0 + ; CHECK: $x0 = COPY [[GEP1]](p0) + ; CHECK: RET_ReallyLR implicit $x0 %res = getelementptr %type1, %type1* %addr, i64 %idx, i32 2, i32 2 ret i32* %res } diff --git a/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll b/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll new file mode 100644 index 000000000000..34b2a5626cc2 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll @@ -0,0 +1,10 @@ +; RUN: llc -O0 -mtriple=arm64 < %s + +declare i8* @llvm.launder.invariant.group(i8*) + +define i8* @barrier(i8* %p) { +; CHECK: bl llvm.launder.invariant.group + %q = call i8* @llvm.launder.invariant.group(i8* %p) + ret i8* %q +} + diff --git a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll index f92a5721a4ee..770985a530c4 100644 --- a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll @@ -6,7 +6,7 @@ define void @test_varargs_sentinel(i8* %list, i64, i64, i64, i64, i64, i64, i64, ; CHECK: fixedStack: ; CHECK: - { id: [[VARARGS_SLOT:[0-9]+]], type: default, offset: 8 ; CHECK: body: -; CHECK: [[LIST:%[0-9]+]]:gpr64sp = COPY %x0 +; CHECK: [[LIST:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[VARARGS_AREA:%[0-9]+]]:gpr64common = ADDXri %fixed-stack.[[VARARGS_SLOT]], 0, 0 ; CHECK: STRXui [[VARARGS_AREA]], [[LIST]], 0 :: (store 8 into %ir.list, align 0) call void @llvm.va_start(i8* %list) diff --git a/test/CodeGen/AArch64/GlobalISel/vastart.ll b/test/CodeGen/AArch64/GlobalISel/vastart.ll index 1fb3eb55e677..9e83f030a058 100644 --- a/test/CodeGen/AArch64/GlobalISel/vastart.ll +++ b/test/CodeGen/AArch64/GlobalISel/vastart.ll @@ -5,7 +5,7 @@ declare void @llvm.va_start(i8*) define void @test_va_start(i8* %list) { ; CHECK-LABEL: name: test_va_start -; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-IOS: G_VASTART [[LIST]](p0) :: (store 8 into %ir.list, align 0) ; CHECK-LINUX: G_VASTART [[LIST]](p0) :: (store 32 into %ir.list, align 0) call void @llvm.va_start(i8* %list) diff --git a/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir b/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir index 94a9134072a3..c5586932b527 100644 --- a/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir +++ b/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir @@ -17,6 +17,6 @@ registers: - { id: 0, class: _ } body: | bb.0: - liveins: %x0 - %0(s64) = COPY %x0 + liveins: $x0 + %0(s64) = COPY $x0 ... diff --git a/test/CodeGen/AArch64/GlobalISel/verify-selected.mir b/test/CodeGen/AArch64/GlobalISel/verify-selected.mir index 772233ec1038..ad17fa930151 100644 --- a/test/CodeGen/AArch64/GlobalISel/verify-selected.mir +++ b/test/CodeGen/AArch64/GlobalISel/verify-selected.mir @@ -18,8 +18,8 @@ registers: - { id: 2, class: gpr } body: | bb.0: - liveins: %x0 - %0 = COPY %x0 + liveins: $x0 + %0 = COPY $x0 ; CHECK: *** Bad machine code: Unexpected generic instruction in a Selected function *** ; CHECK: instruction: %1:gpr64 = G_ADD @@ -28,5 +28,5 @@ body: | ; CHECK: *** Bad machine code: Generic virtual register invalid in a Selected function *** ; CHECK: instruction: %2:gpr(s64) = COPY ; CHECK: operand 0: %2 - %2(s64) = COPY %x0 + %2(s64) = COPY $x0 ... diff --git a/test/CodeGen/AArch64/O0-pipeline.ll b/test/CodeGen/AArch64/O0-pipeline.ll new file mode 100644 index 000000000000..dd0d08e68e94 --- /dev/null +++ b/test/CodeGen/AArch64/O0-pipeline.ll @@ -0,0 +1,67 @@ +; RUN: llc -mtriple=arm64-- -O0 -debug-pass=Structure < %s -o /dev/null 2>&1 | grep -v "Verify generated machine code" | FileCheck %s + +; REQUIRES: asserts + +; CHECK-LABEL: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Target Pass Configuration +; CHECK-NEXT: Machine Module Information +; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Type-Based Alias Analysis +; CHECK-NEXT: Scoped NoAlias Alias Analysis +; CHECK-NEXT: Assumption Cache Tracker +; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Machine Branch Probability Analysis +; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: Pre-ISel Intrinsic Lowering +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Lower Garbage Collection Instructions +; CHECK-NEXT: Shadow Stack GC Lowering +; CHECK-NEXT: Remove unreachable blocks from the CFG +; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) +; CHECK-NEXT: Scalarize Masked Memory Intrinsics +; CHECK-NEXT: Expand reduction intrinsics +; CHECK-NEXT: Rewrite Symbols +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: Safe Stack instrumentation pass +; CHECK-NEXT: Insert stack protectors +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: IRTranslator +; CHECK-NEXT: Legalizer +; CHECK-NEXT: RegBankSelect +; CHECK-NEXT: Localizer +; CHECK-NEXT: InstructionSelect +; CHECK-NEXT: ResetMachineFunction +; CHECK-NEXT: AArch64 Instruction Selection +; CHECK-NEXT: Expand ISel Pseudo-instructions +; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: Eliminate PHI nodes for register allocation +; CHECK-NEXT: Two-Address instruction pass +; CHECK-NEXT: Fast Register Allocator +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; CHECK-NEXT: Post-RA pseudo instruction expansion pass +; CHECK-NEXT: AArch64 pseudo instruction expansion pass +; CHECK-NEXT: Analyze Machine Code For Garbage Collection +; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: StackMap Liveness Analysis +; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: Insert fentry calls +; CHECK-NEXT: Insert XRay ops +; CHECK-NEXT: Implement the 'patchable-function' attribute +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: AArch64 Assembly Printer +; CHECK-NEXT: Free MachineFunction + +define void @f() { + ret void +} diff --git a/test/CodeGen/AArch64/O3-pipeline.ll b/test/CodeGen/AArch64/O3-pipeline.ll new file mode 100644 index 000000000000..e482682fc9d9 --- /dev/null +++ b/test/CodeGen/AArch64/O3-pipeline.ll @@ -0,0 +1,167 @@ +; RUN: llc -mtriple=arm64-- -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | grep -v "Verify generated machine code" | FileCheck %s + +; REQUIRES: asserts + +; CHECK-LABEL: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Target Pass Configuration +; CHECK-NEXT: Machine Module Information +; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Assumption Cache Tracker +; CHECK-NEXT: Type-Based Alias Analysis +; CHECK-NEXT: Scoped NoAlias Alias Analysis +; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Profile summary info +; CHECK-NEXT: Machine Branch Probability Analysis +; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: Pre-ISel Intrinsic Lowering +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: Simplify the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Loop Data Prefetch +; CHECK-NEXT: Falkor HW Prefetch Fix +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Induction Variable Users +; CHECK-NEXT: Loop Strength Reduction +; CHECK-NEXT: Merge contiguous icmps into a memcmp +; CHECK-NEXT: Expand memcmp() to load/stores +; CHECK-NEXT: Lower Garbage Collection Instructions +; CHECK-NEXT: Shadow Stack GC Lowering +; CHECK-NEXT: Remove unreachable blocks from the CFG +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Constant Hoisting +; CHECK-NEXT: Partially inline calls to library functions +; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) +; CHECK-NEXT: Scalarize Masked Memory Intrinsics +; CHECK-NEXT: Expand reduction intrinsics +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Interleaved Access Pass +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: CodeGen Prepare +; CHECK-NEXT: Rewrite Symbols +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: AArch64 Promote Constant +; CHECK-NEXT: Unnamed pass: implement Pass::getPassName() +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Merge internal globals +; CHECK-NEXT: Safe Stack instrumentation pass +; CHECK-NEXT: Insert stack protectors +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: AArch64 Instruction Selection +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: AArch64 Local Dynamic TLS Access Clean-up +; CHECK-NEXT: Expand ISel Pseudo-instructions +; CHECK-NEXT: Early Tail Duplication +; CHECK-NEXT: Optimize machine instruction PHIs +; CHECK-NEXT: Slot index numbering +; CHECK-NEXT: Merge disjoint stack slots +; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: AArch64 Condition Optimizer +; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Machine Trace Metrics +; CHECK-NEXT: AArch64 Conditional Compares +; CHECK-NEXT: Machine InstCombiner +; CHECK-NEXT: AArch64 Conditional Branch Tuning +; CHECK-NEXT: Machine Trace Metrics +; CHECK-NEXT: Early If-Conversion +; CHECK-NEXT: AArch64 Store Pair Suppression +; CHECK-NEXT: AArch64 SIMD instructions optimization pass +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Early Machine Loop Invariant Code Motion +; CHECK-NEXT: Machine Common Subexpression Elimination +; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: Machine code sinking +; CHECK-NEXT: Peephole Optimizations +; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: AArch64 Dead register definitions +; CHECK-NEXT: Detect Dead Lanes +; CHECK-NEXT: Process Implicit Definitions +; CHECK-NEXT: Remove unreachable machine basic blocks +; CHECK-NEXT: Live Variable Analysis +; CHECK-NEXT: Eliminate PHI nodes for register allocation +; CHECK-NEXT: Two-Address instruction pass +; CHECK-NEXT: Slot index numbering +; CHECK-NEXT: Live Interval Analysis +; CHECK-NEXT: Simple Register Coalescing +; CHECK-NEXT: Rename Disconnected Subregister Components +; CHECK-NEXT: Machine Instruction Scheduler +; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: Debug Variable Analysis +; CHECK-NEXT: Live Stack Slot Analysis +; CHECK-NEXT: Virtual Register Map +; CHECK-NEXT: Live Register Matrix +; CHECK-NEXT: Bundle Machine CFG Edges +; CHECK-NEXT: Spill Code Placement Analysis +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: Greedy Register Allocator +; CHECK-NEXT: Virtual Register Rewriter +; CHECK-NEXT: Stack Slot Coloring +; CHECK-NEXT: Machine Copy Propagation Pass +; CHECK-NEXT: Machine Loop Invariant Code Motion +; CHECK-NEXT: AArch64 Redundant Copy Elimination +; CHECK-NEXT: A57 FP Anti-dependency breaker +; CHECK-NEXT: PostRA Machine Sink +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: Shrink Wrapping analysis +; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; CHECK-NEXT: Control Flow Optimizer +; CHECK-NEXT: Tail Duplication +; CHECK-NEXT: Machine Copy Propagation Pass +; CHECK-NEXT: Post-RA pseudo instruction expansion pass +; CHECK-NEXT: AArch64 pseudo instruction expansion pass +; CHECK-NEXT: AArch64 load / store optimization pass +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Falkor HW Prefetch Fix Late Phase +; CHECK-NEXT: PostRA Machine Instruction Scheduler +; CHECK-NEXT: Analyze Machine Code For Garbage Collection +; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Branch Probability Basic Block Placement +; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: StackMap Liveness Analysis +; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: Insert fentry calls +; CHECK-NEXT: Insert XRay ops +; CHECK-NEXT: Implement the 'patchable-function' attribute +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: AArch64 Assembly Printer +; CHECK-NEXT: Free MachineFunction +; CHECK-NEXT: Pass Arguments: -domtree +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction + +define void @f() { + ret void +} diff --git a/test/CodeGen/AArch64/PBQP-csr.ll b/test/CodeGen/AArch64/PBQP-csr.ll index 16d7f8cb7a5a..e071eda17e35 100644 --- a/test/CodeGen/AArch64/PBQP-csr.ll +++ b/test/CodeGen/AArch64/PBQP-csr.ll @@ -22,7 +22,7 @@ entry: %z.i60 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 9, i32 2 %na = getelementptr inbounds %rs, %rs* %r, i64 0, i32 0 %0 = bitcast double* %x.i to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 72, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 72, i1 false) %1 = load i32, i32* %na, align 4 %cmp70 = icmp sgt i32 %1, 0 br i1 %cmp70, label %for.body.lr.ph, label %for.end @@ -87,5 +87,5 @@ for.end: ; preds = %for.end.loopexit, % } ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) diff --git a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll index d12c4c6f9fae..d242aab085d8 100644 --- a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll +++ b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll @@ -47,7 +47,7 @@ attributes #1 = { nounwind readnone } !0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) !1 = !DIFile(filename: "test.c", directory: "") !2 = !{} -!4 = distinct !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 141, file: !1, scope: !1, type: !6, variables: !12) +!4 = distinct !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 141, file: !1, scope: !1, type: !6, retainedNodes: !12) !6 = !DISubroutineType(types: !7) !7 = !{null, !8} !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9) diff --git a/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll b/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll index fb4df34df298..043ce0933a9b 100644 --- a/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll +++ b/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll @@ -6,13 +6,13 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" declare void @extern(i8*) ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0 ; Function Attrs: nounwind define void @func(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 { bb: %tmp = getelementptr inbounds i8, i8* %arg2, i64 88 - tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false) store i8 0, i8* %arg3 store i8 2, i8* %arg2 store float 0.000000e+00, float* %arg @@ -27,7 +27,7 @@ bb: define void @func2(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 { bb: %tmp = getelementptr inbounds i8, i8* %arg2, i64 88 - tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false) store i8 0, i8* %arg3 store i8 2, i8* %arg2 store float 0.000000e+00, float* %arg diff --git a/test/CodeGen/AArch64/aarch64-be-bv.ll b/test/CodeGen/AArch64/aarch64-be-bv.ll index 163a86b9ae4c..54b7c8ff414b 100644 --- a/test/CodeGen/AArch64/aarch64-be-bv.ll +++ b/test/CodeGen/AArch64/aarch64-be-bv.ll @@ -3,375 +3,375 @@ @vec_v8i16 = global <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> ; CHECK-LABEL: movi_modimm_t1: -define i16 @movi_modimm_t1() nounwind { +define void @movi_modimm_t1() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: movi_modimm_t2: -define i16 @movi_modimm_t2() nounwind { +define void @movi_modimm_t2() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, lsl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: movi_modimm_t3: -define i16 @movi_modimm_t3() nounwind { +define void @movi_modimm_t3() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, lsl #16 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: movi_modimm_t4: -define i16 @movi_modimm_t4() nounwind { +define void @movi_modimm_t4() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, lsl #24 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: movi_modimm_t5: -define i16 @movi_modimm_t5() nounwind { +define void @movi_modimm_t5() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #1 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: movi_modimm_t6: -define i16 @movi_modimm_t6() nounwind { +define void @movi_modimm_t6() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #1, lsl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: movi_modimm_t7: -define i16 @movi_modimm_t7() nounwind { +define void @movi_modimm_t7() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, msl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 511, i16 0, i16 511, i16 0, i16 511, i16 0, i16 511, i16 0> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: movi_modimm_t8: -define i16 @movi_modimm_t8() nounwind { +define void @movi_modimm_t8() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, msl #16 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: movi_modimm_t9: -define i16 @movi_modimm_t9() nounwind { +define void @movi_modimm_t9() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].16b, #1 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: movi_modimm_t10: -define i16 @movi_modimm_t10() nounwind { +define void @movi_modimm_t10() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0x00ffff0000ffff ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: fmov_modimm_t11: -define i16 @fmov_modimm_t11() nounwind { +define void @fmov_modimm_t11() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: fmov v[[REG2:[0-9]+]].4s, #3.00000000 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: fmov_modimm_t12: -define i16 @fmov_modimm_t12() nounwind { +define void @fmov_modimm_t12() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: fmov v[[REG2:[0-9]+]].2d, #0.17968750 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 0, i16 0, i16 0, i16 16327, i16 0, i16 0, i16 0, i16 16327> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: mvni_modimm_t1: -define i16 @mvni_modimm_t1() nounwind { +define void @mvni_modimm_t1() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: mvni_modimm_t2: -define i16 @mvni_modimm_t2() nounwind { +define void @mvni_modimm_t2() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, lsl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: mvni_modimm_t3: -define i16 @mvni_modimm_t3() nounwind { +define void @mvni_modimm_t3() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, lsl #16 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: mvni_modimm_t4: -define i16 @mvni_modimm_t4() nounwind { +define void @mvni_modimm_t4() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, lsl #24 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: mvni_modimm_t5: -define i16 @mvni_modimm_t5() nounwind { +define void @mvni_modimm_t5() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #1 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: mvni_modimm_t6: -define i16 @mvni_modimm_t6() nounwind { +define void @mvni_modimm_t6() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #1, lsl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: mvni_modimm_t7: -define i16 @mvni_modimm_t7() nounwind { +define void @mvni_modimm_t7() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, msl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: mvni_modimm_t8: -define i16 @mvni_modimm_t8() nounwind { +define void @mvni_modimm_t8() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, msl #16 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, <i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: bic_modimm_t1: -define i16 @bic_modimm_t1() nounwind { +define void @bic_modimm_t1() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #1 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = and <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: bic_modimm_t2: -define i16 @bic_modimm_t2() nounwind { +define void @bic_modimm_t2() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #1, lsl #8 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = and <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: bic_modimm_t3: -define i16 @bic_modimm_t3() nounwind { +define void @bic_modimm_t3() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #1, lsl #16 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = and <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: bic_modimm_t4: -define i16 @bic_modimm_t4() nounwind { +define void @bic_modimm_t4() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #1, lsl #24 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = and <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: bic_modimm_t5: -define i16 @bic_modimm_t5() nounwind { +define void @bic_modimm_t5() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #1 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = and <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: bic_modimm_t6: -define i16 @bic_modimm_t6() nounwind { +define void @bic_modimm_t6() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #1, lsl #8 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = and <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: orr_modimm_t1: -define i16 @orr_modimm_t1() nounwind { +define void @orr_modimm_t1() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #1 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = or <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: orr_modimm_t2: -define i16 @orr_modimm_t2() nounwind { +define void @orr_modimm_t2() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #1, lsl #8 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = or <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: orr_modimm_t3: -define i16 @orr_modimm_t3() nounwind { +define void @orr_modimm_t3() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #1, lsl #16 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = or <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: orr_modimm_t4: -define i16 @orr_modimm_t4() nounwind { +define void @orr_modimm_t4() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #1, lsl #24 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = or <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: orr_modimm_t5: -define i16 @orr_modimm_t5() nounwind { +define void @orr_modimm_t5() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #1 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = or <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } ; CHECK-LABEL: orr_modimm_t6: -define i16 @orr_modimm_t6() nounwind { +define void @orr_modimm_t6() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] ; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #1, lsl #8 - ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + ; CHECK-NEXT: st1 { v[[REG1]].8h }, [x{{[0-9]+}}] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = or <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256> - %el = extractelement <8 x i16> %rv, i32 0 - ret i16 %el + store <8 x i16> %rv, <8 x i16>* @vec_v8i16 + ret void } declare i8 @f_v8i8(<8 x i8> %arg) diff --git a/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir index c9ff2cd0d514..c69967d94154 100644 --- a/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir +++ b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir @@ -1,7 +1,7 @@ -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m1 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s # name: f1_2s registers: @@ -12,13 +12,13 @@ registers: - { id: 4, class: fpr64 } body: | bb.0.entry: - %2:fpr64 = COPY %d2 - %1:fpr64 = COPY %d1 - %0:fpr64 = COPY %d0 + %2:fpr64 = COPY $d2 + %1:fpr64 = COPY $d1 + %0:fpr64 = COPY $d0 %3:fpr64 = FMULv2f32 %0, %1 %4:fpr64 = FSUBv2f32 killed %3, %2 - %d0 = COPY %4 - RET_ReallyLR implicit %d0 + $d0 = COPY %4 + RET_ReallyLR implicit $d0 ... # UNPROFITABLE-LABEL: name: f1_2s @@ -26,8 +26,8 @@ body: | # UNPROFITABLE-NEXT: FSUBv2f32 killed %3, %2 # # PROFITABLE-LABEL: name: f1_2s -# PROFITABLE: %5:fpr64 = FNEGv2f32 %2 -# PROFITABLE-NEXT: FMLAv2f32 killed %5, %0, %1 +# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2 +# PROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1 --- name: f1_4s registers: @@ -38,13 +38,13 @@ registers: - { id: 4, class: fpr128 } body: | bb.0.entry: - %2:fpr128 = COPY %q2 - %1:fpr128 = COPY %q1 - %0:fpr128 = COPY %q0 + %2:fpr128 = COPY $q2 + %1:fpr128 = COPY $q1 + %0:fpr128 = COPY $q0 %3:fpr128 = FMULv4f32 %0, %1 %4:fpr128 = FSUBv4f32 killed %3, %2 - %q0 = COPY %4 - RET_ReallyLR implicit %q0 + $q0 = COPY %4 + RET_ReallyLR implicit $q0 ... # UNPROFITABLE-LABEL: name: f1_4s @@ -52,8 +52,8 @@ body: | # UNPROFITABLE-NEXT: FSUBv4f32 killed %3, %2 # # PROFITABLE-LABEL: name: f1_4s -# PROFITABLE: %5:fpr128 = FNEGv4f32 %2 -# PROFITABLE-NEXT: FMLAv4f32 killed %5, %0, %1 +# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv4f32 %2 +# PROFITABLE-NEXT: FMLAv4f32 killed [[R1]], %0, %1 --- name: f1_2d registers: @@ -64,13 +64,13 @@ registers: - { id: 4, class: fpr128 } body: | bb.0.entry: - %2:fpr128 = COPY %q2 - %1:fpr128 = COPY %q1 - %0:fpr128 = COPY %q0 + %2:fpr128 = COPY $q2 + %1:fpr128 = COPY $q1 + %0:fpr128 = COPY $q0 %3:fpr128 = FMULv2f64 %0, %1 %4:fpr128 = FSUBv2f64 killed %3, %2 - %q0 = COPY %4 - RET_ReallyLR implicit %q0 + $q0 = COPY %4 + RET_ReallyLR implicit $q0 ... # UNPROFITABLE-LABEL: name: f1_2d @@ -78,8 +78,8 @@ body: | # UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2 # # PROFITABLE-LABEL: name: f1_2d -# PROFITABLE: %5:fpr128 = FNEGv2f64 %2 -# PROFITABLE-NEXT: FMLAv2f64 killed %5, %0, %1 +# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv2f64 %2 +# PROFITABLE-NEXT: FMLAv2f64 killed [[R1]], %0, %1 --- name: f1_both_fmul_2s registers: @@ -92,15 +92,15 @@ registers: - { id: 6, class: fpr64 } body: | bb.0.entry: - %3:fpr64 = COPY %q3 - %2:fpr64 = COPY %q2 - %1:fpr64 = COPY %q1 - %0:fpr64 = COPY %q0 + %3:fpr64 = COPY $q3 + %2:fpr64 = COPY $q2 + %1:fpr64 = COPY $q1 + %0:fpr64 = COPY $q0 %4:fpr64 = FMULv2f32 %0, %1 %5:fpr64 = FMULv2f32 %2, %3 %6:fpr64 = FSUBv2f32 killed %4, %5 - %q0 = COPY %6 - RET_ReallyLR implicit %q0 + $q0 = COPY %6 + RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_2s @@ -118,15 +118,15 @@ registers: - { id: 6, class: fpr128 } body: | bb.0.entry: - %3:fpr128 = COPY %q3 - %2:fpr128 = COPY %q2 - %1:fpr128 = COPY %q1 - %0:fpr128 = COPY %q0 + %3:fpr128 = COPY $q3 + %2:fpr128 = COPY $q2 + %1:fpr128 = COPY $q1 + %0:fpr128 = COPY $q0 %4:fpr128 = FMULv4f32 %0, %1 %5:fpr128 = FMULv4f32 %2, %3 %6:fpr128 = FSUBv4f32 killed %4, %5 - %q0 = COPY %6 - RET_ReallyLR implicit %q0 + $q0 = COPY %6 + RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_4s @@ -144,15 +144,15 @@ registers: - { id: 6, class: fpr128 } body: | bb.0.entry: - %3:fpr128 = COPY %q3 - %2:fpr128 = COPY %q2 - %1:fpr128 = COPY %q1 - %0:fpr128 = COPY %q0 + %3:fpr128 = COPY $q3 + %2:fpr128 = COPY $q2 + %1:fpr128 = COPY $q1 + %0:fpr128 = COPY $q0 %4:fpr128 = FMULv2f64 %0, %1 %5:fpr128 = FMULv2f64 %2, %3 %6:fpr128 = FSUBv2f64 killed %4, %5 - %q0 = COPY %6 - RET_ReallyLR implicit %q0 + $q0 = COPY %6 + RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_2d diff --git a/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/test/CodeGen/AArch64/aarch64-fold-lslfast.ll index 0dfe04b664d0..55ddaf8b65f1 100644 --- a/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -9,7 +9,8 @@ define i16 @halfword(%struct.a* %ctx, i32 %xor72) nounwind { ; CHECK-LABEL: halfword: ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 ; CHECK: ldrh [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #1] -; CHECK: strh [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #1] +; CHECK: mov [[REG3:x[0-9]+]], [[REG2]] +; CHECK: strh [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #1] %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -24,7 +25,8 @@ define i32 @word(%struct.b* %ctx, i32 %xor72) nounwind { ; CHECK-LABEL: word: ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 ; CHECK: ldr [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #2] -; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #2] +; CHECK: mov [[REG3:x[0-9]+]], [[REG2]] +; CHECK: str [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #2] %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -39,7 +41,8 @@ define i64 @doubleword(%struct.c* %ctx, i32 %xor72) nounwind { ; CHECK-LABEL: doubleword: ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 ; CHECK: ldr [[REG1:x[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #3] -; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #3] +; CHECK: mov [[REG3:x[0-9]+]], [[REG2]] +; CHECK: str [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #3] %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 diff --git a/test/CodeGen/AArch64/aarch64-insert-subvector-undef.ll b/test/CodeGen/AArch64/aarch64-insert-subvector-undef.ll new file mode 100644 index 000000000000..0337f04e5796 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-insert-subvector-undef.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s + +; Check that this does not ICE. + +@d = common dso_local local_unnamed_addr global <4 x i16> zeroinitializer, align 8 + +define <8 x i16> @c(i32 %e) { +entry: + %0 = load <4 x i16>, <4 x i16>* @d, align 8 + %vminv = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %0) + %1 = trunc i32 %vminv to i16 + %vecinit3 = insertelement <4 x i16> <i16 undef, i16 undef, i16 0, i16 0>, i16 %1, i32 1 + %call = tail call <8 x i16> @c(i32 0) #3 + %vgetq_lane = extractelement <8 x i16> %call, i32 0 + %vset_lane = insertelement <4 x i16> %vecinit3, i16 %vgetq_lane, i32 0 + %call4 = tail call i32 bitcast (i32 (...)* @k to i32 (<4 x i16>)*)(<4 x i16> %vset_lane) #3 + ret <8 x i16> undef +} + +declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) +declare i32 @k(...) diff --git a/test/CodeGen/AArch64/aarch64-named-reg-w18.ll b/test/CodeGen/AArch64/aarch64-named-reg-w18.ll index 341c7683dbaa..0e6aef66eab7 100644 --- a/test/CodeGen/AArch64/aarch64-named-reg-w18.ll +++ b/test/CodeGen/AArch64/aarch64-named-reg-w18.ll @@ -1,10 +1,8 @@ -; RUN: not llc -mtriple=aarch64-fuchsia -o - %s 2>&1 | FileCheck %s --check-prefix=ERROR -; RUN: llc -mtriple=aarch64-fuchsia -mattr=+reserve-x18 -o - %s +; RUN: llc -mtriple=aarch64-fuchsia -o - %s define void @set_w18(i32 %x) { entry: ; FIXME: Include an allocatable-specific error message -; ERROR: Invalid register name "w18". tail call void @llvm.write_register.i32(metadata !0, i32 %x) ret void } diff --git a/test/CodeGen/AArch64/aarch64-named-reg-x18.ll b/test/CodeGen/AArch64/aarch64-named-reg-x18.ll index eed852710ba0..9074f2c108af 100644 --- a/test/CodeGen/AArch64/aarch64-named-reg-x18.ll +++ b/test/CodeGen/AArch64/aarch64-named-reg-x18.ll @@ -1,10 +1,8 @@ -; RUN: not llc -mtriple=aarch64-fuchsia -o - %s 2>&1 | FileCheck %s --check-prefix=ERROR -; RUN: llc -mtriple=aarch64-fuchsia -mattr=+reserve-x18 -o - %s +; RUN: llc -mtriple=aarch64-fuchsia -o - %s define void @set_x18(i64 %x) { entry: ; FIXME: Include an allocatable-specific error message -; ERROR: Invalid register name "x18". tail call void @llvm.write_register.i64(metadata !0, i64 %x) ret void } diff --git a/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/test/CodeGen/AArch64/aarch64-stp-cluster.ll index c6bdbe4f0322..6d9b7d4bb4fc 100644 --- a/test/CodeGen/AArch64/aarch64-stp-cluster.ll +++ b/test/CodeGen/AArch64/aarch64-stp-cluster.ll @@ -5,10 +5,10 @@ ; CHECK-LABEL: stp_i64_scale:%bb.0 ; CHECK:Cluster ld/st SU(4) - SU(3) ; CHECK:Cluster ld/st SU(2) - SU(5) -; CHECK:SU(4): STRXui %1, %0, 1 -; CHECK:SU(3): STRXui %1, %0, 2 -; CHECK:SU(2): STRXui %1, %0, 3 -; CHECK:SU(5): STRXui %1, %0, 4 +; CHECK:SU(4): STRXui %1:gpr64, %0:gpr64common, 1 +; CHECK:SU(3): STRXui %1:gpr64, %0:gpr64common, 2 +; CHECK:SU(2): STRXui %1:gpr64, %0:gpr64common, 3 +; CHECK:SU(5): STRXui %1:gpr64, %0:gpr64common, 4 define i64 @stp_i64_scale(i64* nocapture %P, i64 %v) { entry: %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 @@ -26,10 +26,10 @@ entry: ; CHECK-LABEL: stp_i32_scale:%bb.0 ; CHECK:Cluster ld/st SU(4) - SU(3) ; CHECK:Cluster ld/st SU(2) - SU(5) -; CHECK:SU(4): STRWui %1, %0, 1 -; CHECK:SU(3): STRWui %1, %0, 2 -; CHECK:SU(2): STRWui %1, %0, 3 -; CHECK:SU(5): STRWui %1, %0, 4 +; CHECK:SU(4): STRWui %1:gpr32, %0:gpr64common, 1 +; CHECK:SU(3): STRWui %1:gpr32, %0:gpr64common, 2 +; CHECK:SU(2): STRWui %1:gpr32, %0:gpr64common, 3 +; CHECK:SU(5): STRWui %1:gpr32, %0:gpr64common, 4 define i32 @stp_i32_scale(i32* nocapture %P, i32 %v) { entry: %arrayidx = getelementptr inbounds i32, i32* %P, i32 3 @@ -47,10 +47,10 @@ entry: ; CHECK-LABEL:stp_i64_unscale:%bb.0 entry ; CHECK:Cluster ld/st SU(5) - SU(2) ; CHECK:Cluster ld/st SU(4) - SU(3) -; CHECK:SU(5): STURXi %1, %0, -32 -; CHECK:SU(2): STURXi %1, %0, -24 -; CHECK:SU(4): STURXi %1, %0, -16 -; CHECK:SU(3): STURXi %1, %0, -8 +; CHECK:SU(5): STURXi %1:gpr64, %0:gpr64common, -32 +; CHECK:SU(2): STURXi %1:gpr64, %0:gpr64common, -24 +; CHECK:SU(4): STURXi %1:gpr64, %0:gpr64common, -16 +; CHECK:SU(3): STURXi %1:gpr64, %0:gpr64common, -8 define void @stp_i64_unscale(i64* nocapture %P, i64 %v) #0 { entry: %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3 @@ -68,10 +68,10 @@ entry: ; CHECK-LABEL:stp_i32_unscale:%bb.0 entry ; CHECK:Cluster ld/st SU(5) - SU(2) ; CHECK:Cluster ld/st SU(4) - SU(3) -; CHECK:SU(5): STURWi %1, %0, -16 -; CHECK:SU(2): STURWi %1, %0, -12 -; CHECK:SU(4): STURWi %1, %0, -8 -; CHECK:SU(3): STURWi %1, %0, -4 +; CHECK:SU(5): STURWi %1:gpr32, %0:gpr64common, -16 +; CHECK:SU(2): STURWi %1:gpr32, %0:gpr64common, -12 +; CHECK:SU(4): STURWi %1:gpr32, %0:gpr64common, -8 +; CHECK:SU(3): STURWi %1:gpr32, %0:gpr64common, -4 define void @stp_i32_unscale(i32* nocapture %P, i32 %v) #0 { entry: %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3 @@ -89,10 +89,10 @@ entry: ; CHECK-LABEL:stp_double:%bb.0 ; CHECK:Cluster ld/st SU(3) - SU(4) ; CHECK:Cluster ld/st SU(2) - SU(5) -; CHECK:SU(3): STRDui %1, %0, 1 -; CHECK:SU(4): STRDui %1, %0, 2 -; CHECK:SU(2): STRDui %1, %0, 3 -; CHECK:SU(5): STRDui %1, %0, 4 +; CHECK:SU(3): STRDui %1:fpr64, %0:gpr64common, 1 +; CHECK:SU(4): STRDui %1:fpr64, %0:gpr64common, 2 +; CHECK:SU(2): STRDui %1:fpr64, %0:gpr64common, 3 +; CHECK:SU(5): STRDui %1:fpr64, %0:gpr64common, 4 define void @stp_double(double* nocapture %P, double %v) { entry: %arrayidx = getelementptr inbounds double, double* %P, i64 3 @@ -110,10 +110,10 @@ entry: ; CHECK-LABEL:stp_float:%bb.0 ; CHECK:Cluster ld/st SU(3) - SU(4) ; CHECK:Cluster ld/st SU(2) - SU(5) -; CHECK:SU(3): STRSui %1, %0, 1 -; CHECK:SU(4): STRSui %1, %0, 2 -; CHECK:SU(2): STRSui %1, %0, 3 -; CHECK:SU(5): STRSui %1, %0, 4 +; CHECK:SU(3): STRSui %1:fpr32, %0:gpr64common, 1 +; CHECK:SU(4): STRSui %1:fpr32, %0:gpr64common, 2 +; CHECK:SU(2): STRSui %1:fpr32, %0:gpr64common, 3 +; CHECK:SU(5): STRSui %1:fpr32, %0:gpr64common, 4 define void @stp_float(float* nocapture %P, float %v) { entry: %arrayidx = getelementptr inbounds float, float* %P, i64 3 @@ -130,10 +130,10 @@ entry: ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: stp_volatile:%bb.0 ; CHECK-NOT: Cluster ld/st -; CHECK:SU(2): STRXui %1, %0, 3; mem:Volatile -; CHECK:SU(3): STRXui %1, %0, 2; mem:Volatile -; CHECK:SU(4): STRXui %1, %0, 1; mem:Volatile -; CHECK:SU(5): STRXui %1, %0, 4; mem:Volatile +; CHECK:SU(2): STRXui %1:gpr64, %0:gpr64common, 3 :: (volatile +; CHECK:SU(3): STRXui %1:gpr64, %0:gpr64common, 2 :: (volatile +; CHECK:SU(4): STRXui %1:gpr64, %0:gpr64common, 1 :: (volatile +; CHECK:SU(5): STRXui %1:gpr64, %0:gpr64common, 4 :: (volatile define i64 @stp_volatile(i64* nocapture %P, i64 %v) { entry: %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 diff --git a/test/CodeGen/AArch64/aarch64-vuzp.ll b/test/CodeGen/AArch64/aarch64-vuzp.ll new file mode 100644 index 000000000000..a7b20f25557c --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-vuzp.ll @@ -0,0 +1,70 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) + +; CHECK-LABEL: fun1: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +define i32 @fun1() { +entry: + %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef) + %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> + %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1 + store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1 + ret i32 undef +} + +; CHECK-LABEL: fun2: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +define i32 @fun2() { +entry: + %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef) + %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> + %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1 + store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1 + ret i32 undef +} + +; CHECK-LABEL: fun3: +; CHECK-NOT: uzp1 +define i32 @fun3() { +entry: + %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef) + %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 15> + %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1 + store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1 + ret i32 undef +} + +; CHECK-LABEL: fun4: +; CHECK-NOT: uzp2 +define i32 @fun4() { +entry: + %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef) + %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 3, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> + %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1 + store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1 + ret i32 undef +} + +; CHECK-LABEL: pr36582: +; Check that this does not ICE. +define void @pr36582(i8* %p1, i32* %p2) { +entry: + %x = bitcast i8* %p1 to <8 x i8>* + %wide.vec = load <8 x i8>, <8 x i8>* %x, align 1 + %strided.vec = shufflevector <8 x i8> %wide.vec, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %y = zext <4 x i8> %strided.vec to <4 x i32> + %z = bitcast i32* %p2 to <4 x i32>* + store <4 x i32> %y, <4 x i32>* %z, align 4 + ret void +} + +; Check that this pattern is recognized as a VZIP and +; that the vector blend transform does not scramble the pattern. +; CHECK-LABEL: vzipNoBlend: +; CHECK: zip1 +define <8 x i8> @vzipNoBlend(<8 x i8>* %A, <8 x i16>* %B) nounwind { + %t = load <8 x i8>, <8 x i8>* %A + %vzip = shufflevector <8 x i8> %t, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> + ret <8 x i8> %vzip +} diff --git a/test/CodeGen/AArch64/aarch64_f16_be.ll b/test/CodeGen/AArch64/aarch64_f16_be.ll index 7504439bab80..b51798be1697 100644 --- a/test/CodeGen/AArch64/aarch64_f16_be.ll +++ b/test/CodeGen/AArch64/aarch64_f16_be.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-linux-gnuabi -O0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64_be-linux-gnuabi -O0 < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -mtriple=aarch64-linux-gnuabi -O0 -fast-isel < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-linux-gnuabi -O0 -fast-isel < %s | FileCheck %s --check-prefix=CHECK-BE define void @test_bitcast_v8f16_to_v4f32(<8 x half> %a) { ; CHECK-LABEL: test_bitcast_v8f16_to_v4f32: diff --git a/test/CodeGen/AArch64/aarch64_tree_tests.ll b/test/CodeGen/AArch64/aarch64_tree_tests.ll index 08e506a66d5e..0a06765a8f75 100644 --- a/test/CodeGen/AArch64/aarch64_tree_tests.ll +++ b/test/CodeGen/AArch64/aarch64_tree_tests.ll @@ -4,6 +4,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "arm64--linux-gnu" +; FIXME: Misspelled CHECK-LABEL ; CHECK-LABLE: @aarch64_tree_tests_and ; CHECK: .hword 32768 ; CHECK: .hword 32767 @@ -22,6 +23,7 @@ entry: ret <8 x i16> %ret } +; FIXME: Misspelled CHECK-LABEL ; CHECK-LABLE: @aarch64_tree_tests_or ; CHECK: .hword 32768 ; CHECK: .hword 32766 diff --git a/test/CodeGen/AArch64/and-mask-removal.ll b/test/CodeGen/AArch64/and-mask-removal.ll index 8291516d81ea..c02bc881cd33 100644 --- a/test/CodeGen/AArch64/and-mask-removal.ll +++ b/test/CodeGen/AArch64/and-mask-removal.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel=false -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -mtriple=arm64-apple-darwin < %s | FileCheck %s @board = common global [400 x i8] zeroinitializer, align 1 @next_string = common global i32 0, align 4 diff --git a/test/CodeGen/AArch64/andandshift.ll b/test/CodeGen/AArch64/andandshift.ll index e2c7a098908e..e6019b36d597 100644 --- a/test/CodeGen/AArch64/andandshift.ll +++ b/test/CodeGen/AArch64/andandshift.ll @@ -4,7 +4,7 @@ target triple = "arm64--linux-gnu" ; Function Attrs: nounwind readnone define i32 @test1(i8 %a) { -; CHECK-LABLE: @test1 +; CHECK-LABEL: @test1 ; CHECK: ubfx {{w[0-9]+}}, w0, #3, #5 entry: %conv = zext i8 %a to i32 @@ -14,7 +14,7 @@ entry: ; Function Attrs: nounwind readnone define i32 @test2(i8 %a) { -; CHECK-LABLE: @test2 +; CHECK-LABEL: @test2 ; CHECK: and {{w[0-9]+}}, w0, #0xff ; CHECK: ubfx {{w[0-9]+}}, w0, #3, #5 entry: diff --git a/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll index b38b4f2a2b22..262944498c0c 100644 --- a/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll +++ b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll @@ -8,14 +8,14 @@ ; CHECK: adrp x[[PAGE:[0-9]+]], {{l_b@PAGE|.Lb}} ; CHECK: add x[[ADDR:[0-9]+]], x[[PAGE]], {{l_b@PAGEOFF|:lo12:.Lb}} +; CHECK-NEXT: ldr [[VAL2:x[0-9]+]], [x[[ADDR]]] ; CHECK-NEXT: ldr [[VAL:w[0-9]+]], [x[[ADDR]], #8] ; CHECK-NEXT: str [[VAL]], [x0, #8] -; CHECK-NEXT: ldr [[VAL2:x[0-9]+]], [x[[ADDR]]] ; CHECK-NEXT: str [[VAL2]], [x0] define void @foo(i8* %a) { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([3 x i32]* @b to i8*), i64 12, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast ([3 x i32]* @b to i8*), i64 12, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll index 72e5ec6b89b5..949de1a7c637 100644 --- a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll +++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -8,15 +8,9 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK: add.2d v[[REG:[0-9]+]], v0, v1 ; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1 ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1 -; Without advanced copy optimization, we end up with cross register -; banks copies that cannot be coalesced. -; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] -; With advanced copy optimization, we end up with just one copy -; to insert the computed high part into the V register. -; CHECK-OPT-NOT: fmov +; CHECK-NOT: fmov ; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] -; CHECK-NOOPT: fmov d0, [[COPY_REG3]] -; CHECK-OPT-NOT: fmov +; CHECK-NOT: fmov ; CHECK: mov.d v0[1], [[COPY_REG2]] ; CHECK-NEXT: ret ; @@ -24,11 +18,9 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d ; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1 ; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1 -; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] -; GENERIC-OPT-NOT: fmov +; GENERIC-NOT: fmov ; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] -; GENERIC-NOOPT: fmov d0, [[COPY_REG3]] -; GENERIC-OPT-NOT: fmov +; GENERIC-NOT: fmov ; GENERIC: mov v0.d[1], [[COPY_REG2]] ; GENERIC-NEXT: ret %add = add <2 x i64> %a, %b diff --git a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll index 5a1eabc2ee6c..a1002989165c 100644 --- a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll +++ b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs < %s | FileCheck %s ; The following 2 test cases test shufflevector with beginning UNDEF mask. define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) { diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll index 441f45bf90b3..c13f6503aef4 100644 --- a/test/CodeGen/AArch64/arm64-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-aapcs.ll @@ -5,55 +5,54 @@ ; CHECK-LABEL: @test_i128_align define i128 @test_i128_align(i32, i128 %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w4, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w4, [{{x[0-9]+}}, :lo12:var] ret i128 %arg -; CHECK: mov x0, x2 -; CHECK: mov x1, x3 +; CHECK-DAG: mov x0, x2 +; CHECK-DAG: mov x1, x3 } ; CHECK-LABEL: @test_i64x2_align define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) { store i32 %after, i32* @var, align 4 -; CHECK: str w3, [{{x[0-9]+}}, :lo12:var] +; CHECK-DAG: str w3, [{{x[0-9]+}}, :lo12:var] ret [2 x i64] %arg -; CHECK: mov x0, x1 +; CHECK-DAG: mov x0, x1 ; CHECK: mov x1, x2 } @var64 = global i64 0, align 8 - ; Check stack slots are 64-bit at all times. +; Check stack slots are 64-bit at all times. define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, i32 %int, i64 %long) { +; CHECK-LABEL: test_stack_slots: +; CHECK-DAG: ldr w[[ext1:[0-9]+]], [sp, #24] +; CHECK-DAG: ldrh w[[ext2:[0-9]+]], [sp, #16] +; CHECK-DAG: ldrb w[[ext3:[0-9]+]], [sp, #8] +; CHECK-DAG: ldr x[[ext4:[0-9]+]], [sp, #32] +; CHECK-DAG: ldrb w[[ext5:[0-9]+]], [sp] +; CHECK-DAG: and x[[ext5]], x[[ext5]], #0x1 + %ext_bool = zext i1 %bool to i64 store volatile i64 %ext_bool, i64* @var64, align 8 -; CHECK: ldrb w[[EXT:[0-9]+]], [sp] - - ; Part of last store. Blasted scheduler. -; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32] - -; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1 -; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext5]], [{{x[0-9]+}}, :lo12:var64] %ext_char = zext i8 %char to i64 store volatile i64 %ext_char, i64* @var64, align 8 -; CHECK: ldrb w[[EXT:[0-9]+]], [sp, #8] -; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext3]], [{{x[0-9]+}}, :lo12:var64] %ext_short = zext i16 %short to i64 store volatile i64 %ext_short, i64* @var64, align 8 -; CHECK: ldrh w[[EXT:[0-9]+]], [sp, #16] -; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext2]], [{{x[0-9]+}}, :lo12:var64] %ext_int = zext i32 %int to i64 store volatile i64 %ext_int, i64* @var64, align 8 -; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24] -; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext1]], [{{x[0-9]+}}, :lo12:var64] store volatile i64 %long, i64* @var64, align 8 -; CHECK: str [[LONG]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: str x[[ext4]], [{{x[0-9]+}}, :lo12:var64] ret void } @@ -64,8 +63,8 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) { %ext_bool = zext i1 %bool to i64 store volatile i64 %ext_bool, i64* @var64 -; CHECK: and w[[EXT:[0-9]+]], w0, #0x1 -; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: and [[EXT:x[0-9]+]], x0, #0x1 +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] %ext_char = sext i8 %char to i64 store volatile i64 %ext_char, i64* @var64 @@ -74,8 +73,8 @@ define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) { %ext_short = zext i16 %short to i64 store volatile i64 %ext_short, i64* @var64 -; CHECK: and w[[EXT:[0-9]+]], w2, #0xffff -; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] %ext_int = zext i32 %int to i64 store volatile i64 %ext_int, i64* @var64 diff --git a/test/CodeGen/AArch64/arm64-abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll index d6a1686d5663..92c320a82102 100644 --- a/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ b/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -13,9 +13,8 @@ define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, ; CHECK: ldr {{w[0-9]+}}, [sp, #72] ; Second vararg ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8 -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8 ; Third vararg -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8 %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -164,10 +163,10 @@ entry: %4 = bitcast i8* %ap.align to %struct.s41* %5 = bitcast %struct.s41* %vs to i8* %6 = bitcast %struct.s41* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 16, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 %6, i64 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define void @bar2(i32 %x, i128 %s41.coerce) nounwind { entry: diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll index 5be84b7d493b..bfc03c6b9757 100644 --- a/test/CodeGen/AArch64/arm64-abi.ll +++ b/test/CodeGen/AArch64/arm64-abi.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false < %s | FileCheck %s -; RUN: llc -O0 -mtriple=arm64-apple-darwin < %s | FileCheck --check-prefix=FAST %s +; RUN: llc -O0 -fast-isel -mtriple=arm64-apple-darwin < %s | FileCheck --check-prefix=FAST %s ; rdar://9932559 define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline { diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll index b844aab5628c..85fd1eaa9c0b 100644 --- a/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/test/CodeGen/AArch64/arm64-abi_align.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s -; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim | FileCheck -check-prefix=FAST %s +; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s +; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim -fast-isel | FileCheck -check-prefix=FAST %s ; rdar://12648441 ; Generated from arm64-arguments.c with -O2. @@ -290,24 +290,25 @@ entry: ; Space for s2 is allocated at sp ; FAST-LABEL: caller42 -; FAST: sub sp, sp, #112 -; Space for s1 is allocated at fp-24 = sp+72 -; Space for s2 is allocated at sp+48 +; FAST: sub sp, sp, #96 +; Space for s1 is allocated at fp-24 = sp+56 ; FAST: sub x[[A:[0-9]+]], x29, #24 -; FAST: add x[[A:[0-9]+]], sp, #48 ; Call memcpy with size = 24 (0x18) ; FAST: orr {{x[0-9]+}}, xzr, #0x18 +; Space for s2 is allocated at sp+32 +; FAST: add x[[A:[0-9]+]], sp, #32 +; FAST: bl _memcpy %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 %0 = bitcast %struct.s42* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s42* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5 ret i32 %call } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1, @@ -334,21 +335,24 @@ entry: ; FAST-LABEL: caller42_stack ; Space for s1 is allocated at fp-24 -; Space for s2 is allocated at fp-48 ; FAST: sub x[[A:[0-9]+]], x29, #24 -; FAST: sub x[[B:[0-9]+]], x29, #48 ; Call memcpy with size = 24 (0x18) ; FAST: orr {{x[0-9]+}}, xzr, #0x18 -; FAST: str {{w[0-9]+}}, [sp] +; FAST: bl _memcpy +; Space for s2 is allocated at fp-48 +; FAST: sub x[[B:[0-9]+]], x29, #48 +; Call memcpy again +; FAST: bl _memcpy ; Address of s1 is passed on stack at sp+8 +; FAST: str {{w[0-9]+}}, [sp] ; FAST: str {{x[0-9]+}}, [sp, #8] ; FAST: str {{x[0-9]+}}, [sp, #16] %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 %0 = bitcast %struct.s42* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s42* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5 ret i32 %call @@ -401,8 +405,6 @@ entry: ; FAST: add x29, sp, #64 ; Space for s1 is allocated at sp+32 ; Space for s2 is allocated at sp -; FAST: add x1, sp, #32 -; FAST: mov x2, sp ; FAST: str {{x[0-9]+}}, [sp, #32] ; FAST: str {{x[0-9]+}}, [sp, #40] ; FAST: str {{x[0-9]+}}, [sp, #48] @@ -411,12 +413,14 @@ entry: ; FAST: str {{x[0-9]+}}, [sp, #8] ; FAST: str {{x[0-9]+}}, [sp, #16] ; FAST: str {{x[0-9]+}}, [sp, #24] +; FAST: add x1, sp, #32 +; FAST: mov x2, sp %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s43* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5 ret i32 %call } @@ -448,8 +452,6 @@ entry: ; FAST: sub sp, sp, #112 ; Space for s1 is allocated at fp-32 = sp+64 ; Space for s2 is allocated at sp+32 -; FAST: sub x[[A:[0-9]+]], x29, #32 -; FAST: add x[[B:[0-9]+]], sp, #32 ; FAST: stur {{x[0-9]+}}, [x29, #-32] ; FAST: stur {{x[0-9]+}}, [x29, #-24] ; FAST: stur {{x[0-9]+}}, [x29, #-16] @@ -460,14 +462,16 @@ entry: ; FAST: str {{x[0-9]+}}, [sp, #56] ; FAST: str {{w[0-9]+}}, [sp] ; Address of s1 is passed on stack at sp+8 -; FAST: str {{x[0-9]+}}, [sp, #8] -; FAST: str {{x[0-9]+}}, [sp, #16] +; FAST: sub x[[A:[0-9]+]], x29, #32 +; FAST: str x[[A]], [sp, #8] +; FAST: add x[[B:[0-9]+]], sp, #32 +; FAST: str x[[B]], [sp, #16] %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s43* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5 ret i32 %call diff --git a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll index 6eaf75c4fb96..da1f366757a8 100644 --- a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll +++ b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll @@ -8,7 +8,7 @@ define i32 @fct(i32 %i1, i32 %i2) { ; Sign extension is used more than once, thus it should not be folded. ; CodeGenPrepare is not sharing sext across uses, thus this is folded because ; of that. -; _CHECK-NOT_: , sxtw] +; _CHECK-NOT: , sxtw] entry: %idxprom = sext i32 %i1 to i64 %0 = load i8*, i8** @block, align 8 diff --git a/test/CodeGen/AArch64/arm64-addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll index 6da767921632..16f8d0160633 100644 --- a/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/test/CodeGen/AArch64/arm64-addrmode.ll @@ -5,32 +5,31 @@ ; base + offset (imm9) ; CHECK: @t1 -; CHECK: ldr xzr, [x{{[0-9]+}}, #8] +; CHECK: ldr xzr, [x0, #8] ; CHECK: ret -define void @t1() { - %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1 +define void @t1(i64* %object) { + %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void } ; base + offset (> imm9) ; CHECK: @t2 -; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264 +; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264 ; CHECK: ldr xzr, [ -; CHECK: [[ADDREG]]] ; CHECK: ret -define void @t2() { - %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33 +define void @t2(i64* %object) { + %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void } ; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes) ; CHECK: @t3 -; CHECK: ldr xzr, [x{{[0-9]+}}, #32760] +; CHECK: ldr xzr, [x0, #32760] ; CHECK: ret -define void @t3() { - %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095 +define void @t3(i64* %object) { + %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void } @@ -38,10 +37,10 @@ define void @t3() { ; base + unsigned offset (> imm12 * size of type in bytes) ; CHECK: @t4 ; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000 -; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]] +; CHECK: ldr xzr, [x0, x[[NUM]]] ; CHECK: ret -define void @t4() { - %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096 +define void @t4(i64* %object) { + %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void } @@ -58,12 +57,12 @@ define void @t5(i64 %a) { ; base + reg + imm ; CHECK: @t6 -; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3 +; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3 ; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000 ; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]] ; CHECK: ret -define void @t6(i64 %a) { - %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a +define void @t6(i64 %a, i64* %object) { + %tmp1 = getelementptr inbounds i64, i64* %object, i64 %a %incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void diff --git a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll index fdb379871048..45e32c8aa632 100644 --- a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll +++ b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=arm64-eabi -mcpu=cyclone < %s | FileCheck %s ; CHECK: foo -; CHECK-DAG: str w[[REG0:[0-9]+]], [x19, #132] -; CHECK-DAG: str w[[REG0]], [x19, #264] +; CHECK-DAG: stur w[[REG0:[0-9]+]], [x29, #-24] +; CHECK-DAG: stur w[[REG0]], [x29, #-20] define i32 @foo(i32 %a) nounwind { %retval = alloca i32, align 4 %a.addr = alloca i32, align 4 diff --git a/test/CodeGen/AArch64/arm64-arith.ll b/test/CodeGen/AArch64/arm64-arith.ll index bf4990d3c9b5..3c7d43eeb1f6 100644 --- a/test/CodeGen/AArch64/arm64-arith.ll +++ b/test/CodeGen/AArch64/arm64-arith.ll @@ -123,8 +123,7 @@ entry: define i64 @t14(i16 %a, i64 %x) nounwind ssp { entry: ; CHECK-LABEL: t14: -; CHECK: and w8, w0, #0xffff -; CHECK: add x0, x1, w8, uxtw #3 +; CHECK: add x0, x1, w0, uxth #3 ; CHECK: ret %c = zext i16 %a to i64 %d = shl i64 %c, 3 diff --git a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll index 6f88212cd39d..5cd96d09b14d 100644 --- a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll +++ b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll @@ -51,6 +51,21 @@ define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) { ret void } +; CHECK-LABEL: test_i64_v4f16: +define void @test_i64_v4f16(<4 x half>* %p, i64* %q) { +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: fadd +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: str + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to i64 + %4 = add i64 %3, %3 + store i64 %4, i64* %q + ret void +} + ; CHECK-LABEL: test_i64_v4i16: define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) { ; CHECK: ld1 { v{{[0-9]+}}.4h } @@ -140,6 +155,22 @@ define void @test_f64_v4i16(<4 x i16>* %p, double* %q) { ret void } +; CHECK-LABEL: test_f64_v4f16: +define void @test_f64_v4f16(<4 x half>* %p, double* %q) { +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: fadd +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: fadd +; CHECK: str + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to double + %4 = fadd double %3, %3 + store double %4, double* %q + ret void +} + ; CHECK-LABEL: test_f64_v8i8: define void @test_f64_v8i8(<8 x i8>* %p, double* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } @@ -203,6 +234,21 @@ define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) { ret void } +; CHECK-LABEL: test_v1i64_v4f16: +define void @test_v1i64_v4f16(<4 x half>* %p, <1 x i64>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: fadd +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: str + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to <1 x i64> + %4 = add <1 x i64> %3, %3 + store <1 x i64> %4, <1 x i64>* %q + ret void +} + ; CHECK-LABEL: test_v1i64_v4i16: define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.4h } @@ -293,6 +339,21 @@ define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) { ret void } +; CHECK-LABEL: test_v2f32_v4f16: +define void @test_v2f32_v4f16(<4 x half>* %p, <2 x float>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: fadd +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to <2 x float> + %4 = fadd <2 x float> %3, %3 + store <2 x float> %4, <2 x float>* %q + ret void +} + ; CHECK-LABEL: test_v2f32_v8i8: define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } @@ -448,6 +509,19 @@ define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) { ret void } +; CHECK-LABEL: test_v4i16_v4f16: +define void @test_v4i16_v4f16(<4 x half>* %p, <4 x i16>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to <4 x i16> + %4 = add <4 x i16> %3, %3 + store <4 x i16> %4, <4 x i16>* %q + ret void +} + ; CHECK-LABEL: test_v4i16_v8i8: define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } @@ -461,6 +535,109 @@ define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) { ret void } +; CHECK-LABEL: test_v4f16_i64: +define void @test_v4f16_i64(i64* %p, <4 x half>* %q) { +; CHECK: ldr +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load i64, i64* %p + %2 = add i64 %1, %1 + %3 = bitcast i64 %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_f64: +define void @test_v4f16_f64(double* %p, <4 x half>* %q) { +; CHECK: ldr +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load double, double* %p + %2 = fadd double %1, %1 + %3 = bitcast double %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v1i64: +define void @test_v4f16_v1i64(<1 x i64>* %p, <4 x half>* %q) { +; CHECK: ldr +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load <1 x i64>, <1 x i64>* %p + %2 = add <1 x i64> %1, %1 + %3 = bitcast <1 x i64> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v2f32: +define void @test_v4f16_v2f32(<2 x float>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load <2 x float>, <2 x float>* %p + %2 = fadd <2 x float> %1, %1 + %3 = bitcast <2 x float> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v2i32: +define void @test_v4f16_v2i32(<2 x i32>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load <2 x i32>, <2 x i32>* %p + %2 = add <2 x i32> %1, %1 + %3 = bitcast <2 x i32> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v4i16: +define void @test_v4f16_v4i16(<4 x i16>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load <4 x i16>, <4 x i16>* %p + %2 = add <4 x i16> %1, %1 + %3 = bitcast <4 x i16> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v8i8: +define void @test_v4f16_v8i8(<8 x i8>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.8b } +; CHECK: rev16 v{{[0-9]+}}.8b +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load <8 x i8>, <8 x i8>* %p + %2 = add <8 x i8> %1, %1 + %3 = bitcast <8 x i8> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + ; CHECK-LABEL: test_v8i8_i64: define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) { ; CHECK: ldr @@ -567,8 +744,9 @@ define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) { ; CHECK-LABEL: test_f128_v4f32: define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev64 v{{[0-9]+}}.4s ; CHECK: ext ; CHECK: str q @@ -648,8 +826,9 @@ define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) { ; CHECK-LABEL: test_v2f64_v4f32: define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev64 v{{[0-9]+}}.4s ; CHECK: st1 { v{{[0-9]+}}.2d } %1 = load <4 x float>, <4 x float>* %p @@ -726,9 +905,11 @@ define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) { ; CHECK-LABEL: test_v2i64_v4f32: define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: add ; CHECK: st1 { v{{[0-9]+}}.2d } %1 = load <4 x float>, <4 x float>* %p %2 = fadd <4 x float> %1, %1 @@ -782,8 +963,8 @@ define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) { ; CHECK: ldr q ; CHECK: rev64 v{{[0-9]+}}.4s ; CHECK: ext -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load fp128, fp128* %p %2 = fadd fp128 %1, %1 %3 = bitcast fp128 %2 to <4 x float> @@ -796,8 +977,8 @@ define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) { define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.2d } ; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <2 x double>, <2 x double>* %p %2 = fadd <2 x double> %1, %1 %3 = bitcast <2 x double> %2 to <4 x float> @@ -810,8 +991,9 @@ define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) { define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.2d } ; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <2 x i64>, <2 x i64>* %p %2 = add <2 x i64> %1, %1 %3 = bitcast <2 x i64> %2 to <4 x float> @@ -823,8 +1005,8 @@ define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) { ; CHECK-LABEL: test_v4f32_v4i32: define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.4s } -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <4 x i32>, <4 x i32>* %p %2 = add <4 x i32> %1, %1 %3 = bitcast <4 x i32> %2 to <4 x float> @@ -837,8 +1019,8 @@ define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) { define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8h } ; CHECK: rev32 v{{[0-9]+}}.8h -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <8 x i16>, <8 x i16>* %p %2 = add <8 x i16> %1, %1 %3 = bitcast <8 x i16> %2 to <4 x float> @@ -851,8 +1033,8 @@ define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) { define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.16b } ; CHECK: rev32 v{{[0-9]+}}.16b -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <16 x i8>, <16 x i8>* %p %2 = add <16 x i8> %1, %1 %3 = bitcast <16 x i8> %2 to <4 x float> @@ -903,8 +1085,8 @@ define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) { ; CHECK-LABEL: test_v4i32_v4f32: define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } +; CHECK-NOT: rev ; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <4 x float>, <4 x float>* %p %2 = fadd <4 x float> %1, %1 @@ -982,9 +1164,9 @@ define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) { ; CHECK-LABEL: test_v8i16_v4f32: define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } ; CHECK: rev32 v{{[0-9]+}}.8h +; CHECK-NOT: rev ; CHECK: st1 { v{{[0-9]+}}.8h } %1 = load <4 x float>, <4 x float>* %p %2 = fadd <4 x float> %1, %1 @@ -1007,6 +1189,19 @@ define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) { ret void } +; CHECK-LABEL: test_v8i16_v8f16: +define void @test_v8i16_v8f16(<8 x half>* %p, <8 x i16>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.8h } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.8h } + %1 = load <8 x half>, <8 x half>* %p + %2 = fadd <8 x half> %1, %1 + %3 = bitcast <8 x half> %2 to <8 x i16> + %4 = add <8 x i16> %3, %3 + store <8 x i16> %4, <8 x i16>* %q + ret void +} + ; CHECK-LABEL: test_v8i16_v16i8: define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.16b } @@ -1062,9 +1257,9 @@ define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) { ; CHECK-LABEL: test_v16i8_v4f32: define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } ; CHECK: rev32 v{{[0-9]+}}.16b +; CHECK-NOT: rev ; CHECK: st1 { v{{[0-9]+}}.16b } %1 = load <4 x float>, <4 x float>* %p %2 = fadd <4 x float> %1, %1 @@ -1087,6 +1282,20 @@ define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) { ret void } +; CHECK-LABEL: test_v16i8_v8f16: +define void @test_v16i8_v8f16(<8 x half>* %p, <16 x i8>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.8h } +; CHECK: rev16 v{{[0-9]+}}.16b +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.16b } + %1 = load <8 x half>, <8 x half>* %p + %2 = fadd <8 x half> %1, %1 + %3 = bitcast <8 x half> %2 to <16 x i8> + %4 = add <16 x i8> %3, %3 + store <16 x i8> %4, <16 x i8>* %q + ret void +} + ; CHECK-LABEL: test_v16i8_v8i16: define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8h } @@ -1099,3 +1308,16 @@ define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) { store <16 x i8> %4, <16 x i8>* %q ret void } + +; CHECK-LABEL: test_v4f16_struct: +%struct.struct1 = type { half, half, half, half } +define %struct.struct1 @test_v4f16_struct(%struct.struct1* %ret) { +entry: +; CHECK: ld1 { {{v[0-9]+}}.4h } +; CHECK-NOT: rev + %0 = bitcast %struct.struct1* %ret to <4 x half>* + %1 = load <4 x half>, <4 x half>* %0, align 2 + %2 = extractelement <4 x half> %1, i32 0 + %.fca.0.insert = insertvalue %struct.struct1 undef, half %2, 0 + ret %struct.struct1 %.fca.0.insert +} diff --git a/test/CodeGen/AArch64/arm64-build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll index 9d3247350499..68dea215c8c4 100644 --- a/test/CodeGen/AArch64/arm64-build-vector.ll +++ b/test/CodeGen/AArch64/arm64-build-vector.ll @@ -1,23 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s -; Check that building up a vector w/ only one non-zero lane initializes -; intelligently. -define void @one_lane(i32* nocapture %out_int, i32 %skip0) nounwind { -; CHECK-LABEL: one_lane: -; CHECK: dup.16b v[[REG:[0-9]+]], wzr -; CHECK-NEXT: mov.b v[[REG]][0], w1 -; v and q are aliases, and str is preferred against st.16b when possible -; rdar://11246289 -; CHECK: str q[[REG]], [x0] -; CHECK: ret - %conv = trunc i32 %skip0 to i8 - %vset_lane = insertelement <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %conv, i32 0 - %tmp = bitcast i32* %out_int to <4 x i32>* - %tmp1 = bitcast <16 x i8> %vset_lane to <4 x i32> - store <4 x i32> %tmp1, <4 x i32>* %tmp, align 16 - ret void -} - ; Check that building a vector from floats doesn't insert an unnecessary ; copy for lane zero. define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind { diff --git a/test/CodeGen/AArch64/arm64-collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll index c7ba989d933e..eb3607dd437c 100644 --- a/test/CodeGen/AArch64/arm64-collect-loh.ll +++ b/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -638,13 +638,13 @@ define void @setL(<1 x i8> %t) { ; CHECK: [[LOH_LABEL1:Lloh[0-9]+]]: ; CHECK: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF] ; The tuple comes from the next instruction. -; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]] +; CHECK: ext.16b v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, #1 ; CHECK: ret ; CHECK: .loh AdrpLdr [[LOH_LABEL0]], [[LOH_LABEL1]] define void @uninterestingSub(i8* nocapture %row) #0 { %tmp = bitcast i8* %row to <16 x i8>* %tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16 - %vext43 = shufflevector <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %tmp1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> + %vext43 = shufflevector <16 x i8> <i8 undef, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2>, <16 x i8> %tmp1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> %add.i.414 = add <16 x i8> zeroinitializer, %vext43 store <16 x i8> %add.i.414, <16 x i8>* %tmp, align 16 %add.ptr51 = getelementptr inbounds i8, i8* %row, i64 16 @@ -661,7 +661,7 @@ define void @uninterestingSub(i8* nocapture %row) #0 { @.str.89 = external unnamed_addr constant [12 x i8], align 1 @.str.90 = external unnamed_addr constant [5 x i8], align 1 ; CHECK-LABEL: test_r274582 -define void @test_r274582() { +define void @test_r274582(double %x) { entry: br i1 undef, label %if.then.i, label %if.end.i if.then.i: @@ -671,10 +671,10 @@ if.end.i: ; CHECK: .loh AdrpLdrGot ; CHECK: .loh AdrpAdrp ; CHECK: .loh AdrpLdr - %mul.i.i.i = fmul double undef, 1.000000e-06 - %add.i.i.i = fadd double undef, %mul.i.i.i - %sub.i.i = fsub double %add.i.i.i, undef - call void (i8*, ...) @callee(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.89, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.90, i64 0, i64 0), double %sub.i.i) + %mul = fmul double %x, 1.000000e-06 + %add = fadd double %mul, %mul + %sub = fsub double %add, %add + call void (i8*, ...) @callee(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.89, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.90, i64 0, i64 0), double %sub) unreachable } declare void @callee(i8* nocapture readonly, ...) diff --git a/test/CodeGen/AArch64/arm64-csel.ll b/test/CodeGen/AArch64/arm64-csel.ll index 3e246105f057..0f3b7746e5d1 100644 --- a/test/CodeGen/AArch64/arm64-csel.ll +++ b/test/CodeGen/AArch64/arm64-csel.ll @@ -79,6 +79,7 @@ define i32 @foo7(i32 %a, i32 %b) nounwind { entry: ; CHECK-LABEL: foo7: ; CHECK: sub +; FIXME: Misspelled CHECK-NEXT ; CHECK-next: adds ; CHECK-next: csneg ; CHECK-next: b diff --git a/test/CodeGen/AArch64/arm64-csldst-mmo.ll b/test/CodeGen/AArch64/arm64-csldst-mmo.ll index c69779add59b..40c82ecca3f2 100644 --- a/test/CodeGen/AArch64/arm64-csldst-mmo.ll +++ b/test/CodeGen/AArch64/arm64-csldst-mmo.ll @@ -10,8 +10,8 @@ ; ; CHECK: Before post-MI-sched: ; CHECK-LABEL: # Machine code for function test1: -; CHECK: SU(2): STRWui %wzr -; CHECK: SU(3): %x21, %x20 = LDPXi %sp +; CHECK: SU(2): STRWui $wzr +; CHECK: SU(3): $x21, $x20 = frame-destroy LDPXi $sp, 2 ; CHECK: Predecessors: ; CHECK-NEXT: SU(0): Out ; CHECK-NEXT: SU(0): Out diff --git a/test/CodeGen/AArch64/arm64-elf-constpool.ll b/test/CodeGen/AArch64/arm64-elf-constpool.ll index 95d334376b76..9f7a885f0087 100644 --- a/test/CodeGen/AArch64/arm64-elf-constpool.ll +++ b/test/CodeGen/AArch64/arm64-elf-constpool.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu -O0 -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -O0 -fast-isel -o - %s | FileCheck %s ; O0 checked for fastisel purposes. It has a separate path which ; creates a constpool entry for floating values. diff --git a/test/CodeGen/AArch64/arm64-elf-globals.ll b/test/CodeGen/AArch64/arm64-elf-globals.ll index 92dc8179f8ea..6cb72e2e3f4e 100644 --- a/test/CodeGen/AArch64/arm64-elf-globals.ll +++ b/test/CodeGen/AArch64/arm64-elf-globals.ll @@ -1,11 +1,11 @@ ; RUN: llc -mtriple=arm64-linux-gnu -o - %s -mcpu=cyclone | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu -o - %s -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc -mtriple=arm64-linux-gnu -o - %s -O0 -fast-isel -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST ; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-PIC -; RUN: llc -mtriple=arm64-linux-gnu -O0 -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST-PIC +; RUN: llc -mtriple=arm64-linux-gnu -O0 -fast-isel -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST-PIC ; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -o - %s -mcpu=cyclone | FileCheck %s -; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -o - %s -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -o - %s -O0 -fast-isel -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST ; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-PIC -; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -O0 -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST-PIC +; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -O0 -fast-isel -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST-PIC @var8 = external global i8, align 1 @var16 = external global i16, align 2 diff --git a/test/CodeGen/AArch64/arm64-ext.ll b/test/CodeGen/AArch64/arm64-ext.ll index 584456e70393..d59d5821ebf3 100644 --- a/test/CodeGen/AArch64/arm64-ext.ll +++ b/test/CodeGen/AArch64/arm64-ext.ll @@ -94,19 +94,6 @@ define <8 x i16> @test_vextRq_undef2(<8 x i16>* %A) nounwind { ; Tests for ReconstructShuffle function. Indices have to be carefully ; chosen to reach lowering phase as a BUILD_VECTOR. -; One vector needs vext, the other can be handled by extract_subvector -; Also checks interleaving of sources is handled correctly. -; Essence: a vext is used on %A and something saner than stack load/store for final result. -define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: test_interleaved: -;CHECK: ext.8b -;CHECK: zip1.4h - %tmp1 = load <8 x i16>, <8 x i16>* %A - %tmp2 = load <8 x i16>, <8 x i16>* %B - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9> - ret <4 x i16> %tmp3 -} - ; An undef in the shuffle list should still be optimizable define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: test_undef: diff --git a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll index bdc24aea2144..256db180d911 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll @@ -1,5 +1,5 @@ ; This test should cause the TargetMaterializeAlloca to be invoked -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -disable-fp-elim < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -disable-fp-elim < %s | FileCheck %s %struct.S1Ty = type { i64 } %struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty } diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll index 55c9c6036ed5..87d6811f239e 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -mtriple=arm64-apple-darwin -mcpu=cyclone -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=arm64-apple-darwin -mcpu=cyclone -verify-machineinstrs < %s | FileCheck %s define void @branch1() nounwind uwtable ssp { %x = alloca i32, align 4 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/test/CodeGen/AArch64/arm64-fast-isel-call.ll index 59c4e38e5467..abbe6554a54e 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-call.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-call.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s -; RUN: llc -O0 -fast-isel-abort=2 -code-model=large -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE -; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -fast-isel-sink-local-values -O0 -fast-isel -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -fast-isel-sink-local-values -O0 -fast-isel -fast-isel-abort=2 -code-model=large -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE +; RUN: llc -fast-isel-sink-local-values -O0 -fast-isel -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE define void @call0() nounwind { entry: @@ -80,15 +80,15 @@ define i32 @t2() { entry: ; CHECK-LABEL: t2 ; CHECK: mov [[REG1:x[0-9]+]], xzr +; CHECK: mov x0, [[REG1]] ; CHECK: orr w1, wzr, #0xfffffff8 ; CHECK: orr [[REG2:w[0-9]+]], wzr, #0x3ff -; CHECK: orr [[REG3:w[0-9]+]], wzr, #0x2 -; CHECK: mov [[REG4:w[0-9]+]], wzr -; CHECK: orr [[REG5:w[0-9]+]], wzr, #0x1 -; CHECK: mov x0, [[REG1]] ; CHECK: uxth w2, [[REG2]] +; CHECK: orr [[REG3:w[0-9]+]], wzr, #0x2 ; CHECK: sxtb w3, [[REG3]] +; CHECK: mov [[REG4:w[0-9]+]], wzr ; CHECK: and w4, [[REG4]], #0x1 +; CHECK: orr [[REG5:w[0-9]+]], wzr, #0x1 ; CHECK: and w5, [[REG5]], #0x1 ; CHECK: bl _func2 %call = call i32 @func2(i64 zeroext 0, i32 signext -8, i16 zeroext 1023, i8 signext -254, i1 zeroext 0, i1 zeroext 1) diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll index 16a02de79a91..b3e649c3fc33 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=arm64-eabi < %s | FileCheck --enable-var-scope %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=arm64-eabi < %s | FileCheck --enable-var-scope %s ; Test fptosi define i32 @fptosi_wh(half %a) nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll index 1b6886523311..7b208cceb5b2 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s ;; Test various conversions. define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll index c77949f996c3..51ec377ccaf4 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s define zeroext i1 @fcmp_float1(float %a) { ; CHECK-LABEL: fcmp_float1 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll index 85d000b8606b..63854b97dea5 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -fast-isel-sink-local-values -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s ; Test load/store of global value from global offset table. @seed = common global i64 0, align 8 @@ -18,10 +18,10 @@ entry: ; CHECK: @Rand ; CHECK: adrp [[REG1:x[0-9]+]], _seed@GOTPAGE ; CHECK: ldr [[REG2:x[0-9]+]], {{\[}}[[REG1]], _seed@GOTPAGEOFF{{\]}} -; CHECK: mov [[REG3:x[0-9]+]], #13849 -; CHECK: mov [[REG4:x[0-9]+]], #1309 ; CHECK: ldr [[REG5:x[0-9]+]], {{\[}}[[REG2]]{{\]}} +; CHECK: mov [[REG4:x[0-9]+]], #1309 ; CHECK: mul [[REG6:x[0-9]+]], [[REG5]], [[REG4]] +; CHECK: mov [[REG3:x[0-9]+]], #13849 ; CHECK: add [[REG7:x[0-9]+]], [[REG6]], [[REG3]] ; CHECK: and [[REG8:x[0-9]+]], [[REG7]], #0xffff ; CHECK: str [[REG8]], {{\[}}[[REG1]]{{\]}} diff --git a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll index 4bc02ebdd3e1..4288aa1df444 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s define i32 @icmp_eq_imm(i32 %a) nounwind ssp { entry: diff --git a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll index a8f30ad4777d..0fcd4fe752f2 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios < %s | FileCheck %s --check-prefix=ARM64 +; RUN: llc -fast-isel-sink-local-values -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios < %s | FileCheck %s --check-prefix=ARM64 @message = global [80 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 16 @temp = common global [80 x i8] zeroinitializer, align 16 @@ -8,14 +8,14 @@ define void @t1() { ; ARM64: adrp x8, _message@PAGE ; ARM64: add x0, x8, _message@PAGEOFF ; ARM64: mov w9, wzr -; ARM64: mov x2, #80 ; ARM64: uxtb w1, w9 +; ARM64: mov x2, #80 ; ARM64: bl _memset - call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) define void @t2() { ; ARM64-LABEL: t2 @@ -25,11 +25,11 @@ define void @t2() { ; ARM64: add x1, x8, _message@PAGEOFF ; ARM64: mov x2, #80 ; ARM64: bl _memcpy - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) define void @t3() { ; ARM64-LABEL: t3 @@ -39,11 +39,11 @@ define void @t3() { ; ARM64: add x1, x8, _message@PAGEOFF ; ARM64: mov x2, #20 ; ARM64: bl _memmove - call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) define void @t4() { ; ARM64-LABEL: t4 @@ -58,7 +58,7 @@ define void @t4() { ; ARM64: ldrb w11, [x9, #16] ; ARM64: strb w11, [x8, #16] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false) ret void } @@ -75,7 +75,7 @@ define void @t5() { ; ARM64: ldrb w11, [x9, #16] ; ARM64: strb w11, [x8, #16] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false) ret void } @@ -92,7 +92,7 @@ define void @t6() { ; ARM64: ldrb w10, [x9, #8] ; ARM64: strb w10, [x8, #8] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i1 false) ret void } @@ -111,7 +111,7 @@ define void @t7() { ; ARM64: ldrb w10, [x9, #6] ; ARM64: strb w10, [x8, #6] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i1 false) ret void } @@ -130,7 +130,7 @@ define void @t8() { ; ARM64: ldrb w10, [x9, #3] ; ARM64: strb w10, [x8, #3] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i1 false) ret void } @@ -143,6 +143,6 @@ define void @test_distant_memcpy(i8* %dst) { ; ARM64: strb [[BYTE]], [x0] %array = alloca i8, i32 8192 %elem = getelementptr i8, i8* %array, i32 8000 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i1 false) ret void } diff --git a/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll index b5a08c148930..234731cfa242 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s ; Materialize using fmov define float @fmov_float1() { diff --git a/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll index 81daa7c1d5ac..d9997f916955 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios < %s | FileCheck %s ; Fast-isel can't do vector conversions yet, but it was emitting some highly ; suspect UCVTFUWDri MachineInstrs. diff --git a/test/CodeGen/AArch64/arm64-fast-isel-rem.ll b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll index c26bfa8bcfeb..635e6b92542a 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-rem.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s -; RUN: llc %s -O0 -fast-isel-abort=1 -mtriple=arm64-apple-darwin -print-machineinstrs=expand-isel-pseudos -o /dev/null 2> %t +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc %s -O0 -fast-isel -fast-isel-abort=1 -mtriple=arm64-apple-darwin -print-machineinstrs=expand-isel-pseudos -o /dev/null 2> %t ; RUN: FileCheck %s < %t --check-prefix=CHECK-SSA ; CHECK-SSA-LABEL: Machine code for function t1 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-ret.ll b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll index 1f6a60e77cc3..9a67fff00ac3 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-ret.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s ;; Test returns. define void @t0() nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-fast-isel.ll b/test/CodeGen/AArch64/arm64-fast-isel.ll index 9f83a9c359a2..508e36750eec 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -fast-isel-sink-local-values -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s define void @t0(i32 %a) nounwind { entry: @@ -95,6 +95,8 @@ declare void @llvm.trap() nounwind define void @ands(i32* %addr) { ; CHECK-LABEL: ands: ; CHECK: tst [[COND:w[0-9]+]], #0x1 +; CHECK-NEXT: orr w{{[0-9]+}}, wzr, #0x2 +; CHECK-NEXT: orr w{{[0-9]+}}, wzr, #0x1 ; CHECK-NEXT: csel [[COND]], entry: %cond91 = select i1 undef, i32 1, i32 2 diff --git a/test/CodeGen/AArch64/arm64-fold-lsl.ll b/test/CodeGen/AArch64/arm64-fold-lsl.ll index 57ef7d736730..0790e4c58c46 100644 --- a/test/CodeGen/AArch64/arm64-fold-lsl.ll +++ b/test/CodeGen/AArch64/arm64-fold-lsl.ll @@ -77,3 +77,309 @@ define void @store_doubleword(%struct.c* %ctx, i32 %xor72, i64 %val) nounwind { store i64 %val, i64* %arrayidx86, align 8 ret void } + +; Check that we combine a shift into the offset instead of using a narrower load +; when we have a load followed by a trunc + +define i32 @load_doubleword_trunc_word(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_word: +; CHECK: ldr x0, [x0, x1, lsl #3] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i32 + ret i32 %trunc +} + +define i16 @load_doubleword_trunc_halfword(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_halfword: +; CHECK: ldr x0, [x0, x1, lsl #3] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i16 + ret i16 %trunc +} + +define i8 @load_doubleword_trunc_byte(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_byte: +; CHECK: ldr x0, [x0, x1, lsl #3] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i8 + ret i8 %trunc +} + +define i16 @load_word_trunc_halfword(i32* %ptr, i64 %off) { +entry: +; CHECK-LABEL: load_word_trunc_halfword: +; CHECK: ldr w0, [x0, x1, lsl #2] + %idx = getelementptr inbounds i32, i32* %ptr, i64 %off + %x = load i32, i32* %idx, align 8 + %trunc = trunc i32 %x to i16 + ret i16 %trunc +} + +define i8 @load_word_trunc_byte(i32* %ptr, i64 %off) { +; CHECK-LABEL: load_word_trunc_byte: +; CHECK: ldr w0, [x0, x1, lsl #2] +entry: + %idx = getelementptr inbounds i32, i32* %ptr, i64 %off + %x = load i32, i32* %idx, align 8 + %trunc = trunc i32 %x to i8 + ret i8 %trunc +} + +define i8 @load_halfword_trunc_byte(i16* %ptr, i64 %off) { +; CHECK-LABEL: load_halfword_trunc_byte: +; CHECK: ldrh w0, [x0, x1, lsl #1] +entry: + %idx = getelementptr inbounds i16, i16* %ptr, i64 %off + %x = load i16, i16* %idx, align 8 + %trunc = trunc i16 %x to i8 + ret i8 %trunc +} + +; Check that we do use a narrower load, and so don't combine the shift, when +; the loaded value is zero-extended. + +define i64 @load_doubleword_trunc_word_zext(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_word_zext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #3 +; CHECK: ldr w0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i32 + %ext = zext i32 %trunc to i64 + ret i64 %ext +} + +define i64 @load_doubleword_trunc_halfword_zext(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_halfword_zext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #3 +; CHECK: ldrh w0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i16 + %ext = zext i16 %trunc to i64 + ret i64 %ext +} + +define i64 @load_doubleword_trunc_byte_zext(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_byte_zext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #3 +; CHECK: ldrb w0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i8 + %ext = zext i8 %trunc to i64 + ret i64 %ext +} + +define i64 @load_word_trunc_halfword_zext(i32* %ptr, i64 %off) { +; CHECK-LABEL: load_word_trunc_halfword_zext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #2 +; CHECK: ldrh w0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i32, i32* %ptr, i64 %off + %x = load i32, i32* %idx, align 8 + %trunc = trunc i32 %x to i16 + %ext = zext i16 %trunc to i64 + ret i64 %ext +} + +define i64 @load_word_trunc_byte_zext(i32* %ptr, i64 %off) { +; CHECK-LABEL: load_word_trunc_byte_zext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #2 +; CHECK: ldrb w0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i32, i32* %ptr, i64 %off + %x = load i32, i32* %idx, align 8 + %trunc = trunc i32 %x to i8 + %ext = zext i8 %trunc to i64 + ret i64 %ext +} + +define i64 @load_halfword_trunc_byte_zext(i16* %ptr, i64 %off) { +; CHECK-LABEL: load_halfword_trunc_byte_zext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #1 +; CHECK: ldrb w0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i16, i16* %ptr, i64 %off + %x = load i16, i16* %idx, align 8 + %trunc = trunc i16 %x to i8 + %ext = zext i8 %trunc to i64 + ret i64 %ext +} + +; Check that we do use a narrower load, and so don't combine the shift, when +; the loaded value is sign-extended. + +define i64 @load_doubleword_trunc_word_sext(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_word_sext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #3 +; CHECK: ldrsw x0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i32 + %ext = sext i32 %trunc to i64 + ret i64 %ext +} + +define i64 @load_doubleword_trunc_halfword_sext(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_halfword_sext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #3 +; CHECK: ldrsh x0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i16 + %ext = sext i16 %trunc to i64 + ret i64 %ext +} + +define i64 @load_doubleword_trunc_byte_sext(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_byte_sext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #3 +; CHECK: ldrsb x0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i8 + %ext = sext i8 %trunc to i64 + ret i64 %ext +} + +define i64 @load_word_trunc_halfword_sext(i32* %ptr, i64 %off) { +; CHECK-LABEL: load_word_trunc_halfword_sext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #2 +; CHECK: ldrsh x0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i32, i32* %ptr, i64 %off + %x = load i32, i32* %idx, align 8 + %trunc = trunc i32 %x to i16 + %ext = sext i16 %trunc to i64 + ret i64 %ext +} + +define i64 @load_word_trunc_byte_sext(i32* %ptr, i64 %off) { +; CHECK-LABEL: load_word_trunc_byte_sext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #2 +; CHECK: ldrsb x0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i32, i32* %ptr, i64 %off + %x = load i32, i32* %idx, align 8 + %trunc = trunc i32 %x to i8 + %ext = sext i8 %trunc to i64 + ret i64 %ext +} + +define i64 @load_halfword_trunc_byte_sext(i16* %ptr, i64 %off) { +; CHECK-LABEL: load_halfword_trunc_byte_sext: +; CHECK: lsl [[REG:x[0-9]+]], x1, #1 +; CHECK: ldrsb x0, [x0, [[REG]]] +entry: + %idx = getelementptr inbounds i16, i16* %ptr, i64 %off + %x = load i16, i16* %idx, align 8 + %trunc = trunc i16 %x to i8 + %ext = sext i8 %trunc to i64 + ret i64 %ext +} + +; Check that we don't combine the shift, and so will use a narrower load, when +; the shift is used more than once. + +define i32 @load_doubleword_trunc_word_reuse_shift(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_word_reuse_shift: +; CHECK: lsl x[[REG1:[0-9]+]], x1, #3 +; CHECK: ldr w[[REG2:[0-9]+]], [x0, x[[REG1]]] +; CHECL: add w0, w[[REG2]], w[[REG1]] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i32 + %lsl = shl i64 %off, 3 + %lsl.trunc = trunc i64 %lsl to i32 + %add = add i32 %trunc, %lsl.trunc + ret i32 %add +} + +define i16 @load_doubleword_trunc_halfword_reuse_shift(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_halfword_reuse_shift: +; CHECK: lsl x[[REG1:[0-9]+]], x1, #3 +; CHECK: ldrh w[[REG2:[0-9]+]], [x0, x[[REG1]]] +; CHECK: add w0, w[[REG2]], w[[REG1]] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i16 + %lsl = shl i64 %off, 3 + %lsl.trunc = trunc i64 %lsl to i16 + %add = add i16 %trunc, %lsl.trunc + ret i16 %add +} + +define i8 @load_doubleword_trunc_byte_reuse_shift(i64* %ptr, i64 %off) { +; CHECK-LABEL: load_doubleword_trunc_byte_reuse_shift: +; CHECK: lsl x[[REG1:[0-9]+]], x1, #3 +; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]] +; CHECK: add w0, w[[REG2]], w[[REG1]] +entry: + %idx = getelementptr inbounds i64, i64* %ptr, i64 %off + %x = load i64, i64* %idx, align 8 + %trunc = trunc i64 %x to i8 + %lsl = shl i64 %off, 3 + %lsl.trunc = trunc i64 %lsl to i8 + %add = add i8 %trunc, %lsl.trunc + ret i8 %add +} + +define i16 @load_word_trunc_halfword_reuse_shift(i32* %ptr, i64 %off) { +entry: +; CHECK-LABEL: load_word_trunc_halfword_reuse_shift: +; CHECK: lsl x[[REG1:[0-9]+]], x1, #2 +; CHECK: ldrh w[[REG2:[0-9]+]], [x0, x[[REG1]]] +; CHECK: add w0, w[[REG2]], w[[REG1]] + %idx = getelementptr inbounds i32, i32* %ptr, i64 %off + %x = load i32, i32* %idx, align 8 + %trunc = trunc i32 %x to i16 + %lsl = shl i64 %off, 2 + %lsl.trunc = trunc i64 %lsl to i16 + %add = add i16 %trunc, %lsl.trunc + ret i16 %add +} + +define i8 @load_word_trunc_byte_reuse_shift(i32* %ptr, i64 %off) { +; CHECK-LABEL: load_word_trunc_byte_reuse_shift: +; CHECK: lsl x[[REG1:[0-9]+]], x1, #2 +; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]] +; CHECK: add w0, w[[REG2]], w[[REG1]] +entry: + %idx = getelementptr inbounds i32, i32* %ptr, i64 %off + %x = load i32, i32* %idx, align 8 + %trunc = trunc i32 %x to i8 + %lsl = shl i64 %off, 2 + %lsl.trunc = trunc i64 %lsl to i8 + %add = add i8 %trunc, %lsl.trunc + ret i8 %add +} + +define i8 @load_halfword_trunc_byte_reuse_shift(i16* %ptr, i64 %off) { +; CHECK-LABEL: load_halfword_trunc_byte_reuse_shift: +; CHECK: lsl x[[REG1:[0-9]+]], x1, #1 +; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]] +; CHECK: add w0, w[[REG2]], w[[REG1]] +entry: + %idx = getelementptr inbounds i16, i16* %ptr, i64 %off + %x = load i16, i16* %idx, align 8 + %trunc = trunc i16 %x to i8 + %lsl = shl i64 %off, 1 + %lsl.trunc = trunc i64 %lsl to i8 + %add = add i8 %trunc, %lsl.trunc + ret i8 %add +} diff --git a/test/CodeGen/AArch64/arm64-fpcr.ll b/test/CodeGen/AArch64/arm64-fpcr.ll new file mode 100644 index 000000000000..12ee139a7aa2 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-fpcr.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +define i64 @GetFpcr() { +; CHECK-LABEL: GetFpcr +; CHECK: mrs x0, FPCR +; CHECK: ret + %1 = tail call i64 @llvm.aarch64.get.fpcr() + ret i64 %1 +} + +declare i64 @llvm.aarch64.get.fpcr() #0 + +define i32 @GetFltRounds() { +; CHECK-LABEL: GetFltRounds +; CHECK: mrs x8, FPCR +; CHECK: add w8, w8, #1024, lsl #12 +; CHECK: ubfx w0, w8, #22, #2 +; CHECK: ret + %1 = tail call i32 @llvm.flt.rounds() + ret i32 %1 +} + +declare i32 @llvm.flt.rounds() #0 diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll index 14beb1ae9c36..1032a6d620ba 100644 --- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll +++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll @@ -28,6 +28,28 @@ return: ; preds = %if.then172, %cond.e ret void } +; Avoid an assert/bad codegen in LD1LANEPOST lowering by not forming +; LD1LANEPOST ISD nodes with a non-constant lane index. +define <4 x i32> @f2(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2, i32 %idx) { + %L0 = load i32, i32* %p + %p1 = getelementptr i32, i32* %p, i64 1 + %L1 = load i32, i32* %p1 + %v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2 + %vret = insertelement <4 x i32> %v, i32 %L0, i32 %idx + store i32 %L1, i32 *%p + ret <4 x i32> %vret +} + +; Check that a cycle is avoided during isel between the LD1LANEPOST instruction and the load of %L1. +define <4 x i32> @f3(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2) { + %L0 = load i32, i32* %p + %p1 = getelementptr i32, i32* %p, i64 1 + %L1 = load i32, i32* %p1 + %v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2 + %vret = insertelement <4 x i32> %v, i32 %L0, i32 %L1 + ret <4 x i32> %vret +} + ; Function Attrs: nounwind readnone declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #1 diff --git a/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/test/CodeGen/AArch64/arm64-ldp-cluster.ll index 75b02b9d9134..80b67770665f 100644 --- a/test/CodeGen/AArch64/arm64-ldp-cluster.ll +++ b/test/CodeGen/AArch64/arm64-ldp-cluster.ll @@ -1,6 +1,7 @@ ; REQUIRES: asserts ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m1 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck --check-prefix=EXYNOS %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m1 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck --check-prefix=EXYNOSM1 %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m3 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; Test ldr clustering. ; CHECK: ********** MI Scheduling ********** @@ -8,11 +9,11 @@ ; CHECK: Cluster ld/st SU(1) - SU(2) ; CHECK: SU(1): %{{[0-9]+}}:gpr32 = LDRWui ; CHECK: SU(2): %{{[0-9]+}}:gpr32 = LDRWui -; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldr_int:%bb.0 -; EXYNOS: Cluster ld/st SU(1) - SU(2) -; EXYNOS: SU(1): %{{[0-9]+}}:gpr32 = LDRWui -; EXYNOS: SU(2): %{{[0-9]+}}:gpr32 = LDRWui +; EXYNOSM1: ********** MI Scheduling ********** +; EXYNOSM1-LABEL: ldr_int:%bb.0 +; EXYNOSM1: Cluster ld/st SU(1) - SU(2) +; EXYNOSM1: SU(1): %{{[0-9]+}}:gpr32 = LDRWui +; EXYNOSM1: SU(2): %{{[0-9]+}}:gpr32 = LDRWui define i32 @ldr_int(i32* %a) nounwind { %p1 = getelementptr inbounds i32, i32* %a, i32 1 %tmp1 = load i32, i32* %p1, align 2 @@ -28,11 +29,11 @@ define i32 @ldr_int(i32* %a) nounwind { ; CHECK: Cluster ld/st SU(1) - SU(2) ; CHECK: SU(1): %{{[0-9]+}}:gpr64 = LDRSWui ; CHECK: SU(2): %{{[0-9]+}}:gpr64 = LDRSWui -; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldp_sext_int:%bb.0 -; EXYNOS: Cluster ld/st SU(1) - SU(2) -; EXYNOS: SU(1): %{{[0-9]+}}:gpr64 = LDRSWui -; EXYNOS: SU(2): %{{[0-9]+}}:gpr64 = LDRSWui +; EXYNOSM1: ********** MI Scheduling ********** +; EXYNOSM1-LABEL: ldp_sext_int:%bb.0 +; EXYNOSM1: Cluster ld/st SU(1) - SU(2) +; EXYNOSM1: SU(1): %{{[0-9]+}}:gpr64 = LDRSWui +; EXYNOSM1: SU(2): %{{[0-9]+}}:gpr64 = LDRSWui define i64 @ldp_sext_int(i32* %p) nounwind { %tmp = load i32, i32* %p, align 4 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 @@ -49,11 +50,11 @@ define i64 @ldp_sext_int(i32* %p) nounwind { ; CHECK: Cluster ld/st SU(2) - SU(1) ; CHECK: SU(1): %{{[0-9]+}}:gpr32 = LDURWi ; CHECK: SU(2): %{{[0-9]+}}:gpr32 = LDURWi -; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldur_int:%bb.0 -; EXYNOS: Cluster ld/st SU(2) - SU(1) -; EXYNOS: SU(1): %{{[0-9]+}}:gpr32 = LDURWi -; EXYNOS: SU(2): %{{[0-9]+}}:gpr32 = LDURWi +; EXYNOSM1: ********** MI Scheduling ********** +; EXYNOSM1-LABEL: ldur_int:%bb.0 +; EXYNOSM1: Cluster ld/st SU(2) - SU(1) +; EXYNOSM1: SU(1): %{{[0-9]+}}:gpr32 = LDURWi +; EXYNOSM1: SU(2): %{{[0-9]+}}:gpr32 = LDURWi define i32 @ldur_int(i32* %a) nounwind { %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 @@ -69,11 +70,11 @@ define i32 @ldur_int(i32* %a) nounwind { ; CHECK: Cluster ld/st SU(3) - SU(4) ; CHECK: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui ; CHECK: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui -; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldp_half_sext_zext_int:%bb.0 -; EXYNOS: Cluster ld/st SU(3) - SU(4) -; EXYNOS: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui -; EXYNOS: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui +; EXYNOSM1: ********** MI Scheduling ********** +; EXYNOSM1-LABEL: ldp_half_sext_zext_int:%bb.0 +; EXYNOSM1: Cluster ld/st SU(3) - SU(4) +; EXYNOSM1: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui +; EXYNOSM1: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind { %tmp0 = load i64, i64* %q, align 4 %tmp = load i32, i32* %p, align 4 @@ -92,11 +93,11 @@ define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind { ; CHECK: Cluster ld/st SU(3) - SU(4) ; CHECK: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui ; CHECK: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui -; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldp_half_zext_sext_int:%bb.0 -; EXYNOS: Cluster ld/st SU(3) - SU(4) -; EXYNOS: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui -; EXYNOS: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui +; EXYNOSM1: ********** MI Scheduling ********** +; EXYNOSM1-LABEL: ldp_half_zext_sext_int:%bb.0 +; EXYNOSM1: Cluster ld/st SU(3) - SU(4) +; EXYNOSM1: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui +; EXYNOSM1: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind { %tmp0 = load i64, i64* %q, align 4 %tmp = load i32, i32* %p, align 4 @@ -115,11 +116,11 @@ define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind { ; CHECK-NOT: Cluster ld/st ; CHECK: SU(1): %{{[0-9]+}}:gpr32 = LDRWui ; CHECK: SU(2): %{{[0-9]+}}:gpr32 = LDRWui -; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldr_int_volatile:%bb.0 -; EXYNOS-NOT: Cluster ld/st -; EXYNOS: SU(1): %{{[0-9]+}}:gpr32 = LDRWui -; EXYNOS: SU(2): %{{[0-9]+}}:gpr32 = LDRWui +; EXYNOSM1: ********** MI Scheduling ********** +; EXYNOSM1-LABEL: ldr_int_volatile:%bb.0 +; EXYNOSM1-NOT: Cluster ld/st +; EXYNOSM1: SU(1): %{{[0-9]+}}:gpr32 = LDRWui +; EXYNOSM1: SU(2): %{{[0-9]+}}:gpr32 = LDRWui define i32 @ldr_int_volatile(i32* %a) nounwind { %p1 = getelementptr inbounds i32, i32* %a, i32 1 %tmp1 = load volatile i32, i32* %p1, align 2 @@ -135,9 +136,9 @@ define i32 @ldr_int_volatile(i32* %a) nounwind { ; CHECK: Cluster ld/st SU(1) - SU(3) ; CHECK: SU(1): %{{[0-9]+}}:fpr128 = LDRQui ; CHECK: SU(3): %{{[0-9]+}}:fpr128 = LDRQui -; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldq_cluster:%bb.0 -; EXYNOS-NOT: Cluster ld/st +; EXYNOSM1: ********** MI Scheduling ********** +; EXYNOSM1-LABEL: ldq_cluster:%bb.0 +; EXYNOSM1-NOT: Cluster ld/st define <2 x i64> @ldq_cluster(i64* %p) { %a1 = bitcast i64* %p to <2 x i64>* %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8 diff --git a/test/CodeGen/AArch64/arm64-ldst-unscaled-pre-post.mir b/test/CodeGen/AArch64/arm64-ldst-unscaled-pre-post.mir index dacaf4966d07..c7ae3fc97544 100644 --- a/test/CodeGen/AArch64/arm64-ldst-unscaled-pre-post.mir +++ b/test/CodeGen/AArch64/arm64-ldst-unscaled-pre-post.mir @@ -1,115 +1,115 @@ # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s --- # CHECK-LABEL: name: test_LDURSi_post -# CHECK: LDRSpost %x0, -4 +# CHECK: LDRSpost $x0, -4 name: test_LDURSi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - %s0 = LDURSi %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + $s0 = LDURSi $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... # CHECK-LABEL: name: test_LDURDi_post -# CHECK: LDRDpost %x0, -4 +# CHECK: LDRDpost $x0, -4 name: test_LDURDi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - %d0 = LDURDi %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + $d0 = LDURDi $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... # CHECK-LABEL: name: test_LDURQi_post -# CHECK: LDRQpost %x0, -4 +# CHECK: LDRQpost $x0, -4 name: test_LDURQi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - %q0 = LDURQi %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + $q0 = LDURQi $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... # CHECK-LABEL: name: test_LDURWi_post -# CHECK: LDRWpost %x0, -4 +# CHECK: LDRWpost $x0, -4 name: test_LDURWi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - %w1 = LDURWi %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + $w1 = LDURWi $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... # CHECK-LABEL: name: test_LDURXi_post -# CHECK: %x1 = LDRXpost %x0, -4 +# CHECK: $x1 = LDRXpost $x0, -4 name: test_LDURXi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - %x1 = LDURXi %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + $x1 = LDURXi $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... # CHECK-LABEL: name: test_STURSi_post -# CHECK: STRSpost %s0, %x0, -4 +# CHECK: STRSpost $s0, $x0, -4 name: test_STURSi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - %s0 = FMOVS0 - STURSi %s0, %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + $s0 = FMOVS0 + STURSi $s0, $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... # CHECK-LABEL: name: test_STURDi_post -# CHECK: STRDpost %d0, %x0, -4 +# CHECK: STRDpost $d0, $x0, -4 name: test_STURDi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - %d0 = FMOVD0 - STURDi %d0, %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + $d0 = FMOVD0 + STURDi $d0, $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... # CHECK-LABEL: name: test_STURQi_post -# CHECK: STRQpost %q0, %x0, -4 +# CHECK: STRQpost $q0, $x0, -4 name: test_STURQi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - %q0 = MOVIv4i32 0, 0 - STURQi %q0, %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + $q0 = MOVIv4i32 0, 0 + STURQi $q0, $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... # CHECK-LABEL: name: test_STURWi_post -# CHECK: STRWpost %wzr, %x0, -4 +# CHECK: STRWpost $wzr, $x0, -4 name: test_STURWi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - STURWi %wzr, %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + STURWi $wzr, $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... # CHECK-LABEL: name: test_STURXi_post -# CHECK: STRXpost %xzr, %x0, -4 +# CHECK: STRXpost $xzr, $x0, -4 name: test_STURXi_post body: | bb.0.entry: - liveins: %x0 + liveins: $x0 - STURXi %xzr, %x0, 0 - %x0 = SUBXri %x0, 4, 0 - RET_ReallyLR implicit %x0 + STURXi $xzr, $x0, 0 + $x0 = SUBXri $x0, 4, 0 + RET_ReallyLR implicit $x0 ... diff --git a/test/CodeGen/AArch64/arm64-long-shift.ll b/test/CodeGen/AArch64/arm64-long-shift.ll index cc4defefa328..39436d6dd34d 100644 --- a/test/CodeGen/AArch64/arm64-long-shift.ll +++ b/test/CodeGen/AArch64/arm64-long-shift.ll @@ -2,19 +2,17 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: shl: -; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 -; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2 ; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]] ; CHECK: cmp x2, #0 ; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq ; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2 ; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]] +; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, x2 ; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 -; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]] ; CHECK: cmp [[EXTRA_SHIFT]], #0 ; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge -; CHECK: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2 -; CHECK: csel x0, xzr, [[SMALLSHIFT_LO]], ge +; CHECK: csel x0, xzr, [[HI_BIG_SHIFT]], ge ; CHECK: ret %shl = shl i128 %r, %s @@ -23,20 +21,18 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { define i128 @ashr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: ashr: -; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 -; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2 ; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]] ; CHECK: cmp x2, #0 ; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq ; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2 ; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]] +; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2 ; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 -; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]] ; CHECK: cmp [[EXTRA_SHIFT]], #0 ; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge -; CHECK: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 ; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63 -; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge +; CHECK: csel x1, [[BIGSHIFT_HI]], [[LO_BIG_SHIFT]], ge ; CHECK: ret %shr = ashr i128 %r, %s @@ -45,19 +41,16 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone { define i128 @lshr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: lshr: -; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 -; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2 ; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]] ; CHECK: cmp x2, #0 ; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq ; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2 ; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]] -; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 -; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]] +; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2 ; CHECK: cmp [[EXTRA_SHIFT]], #0 ; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge -; CHECK: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 -; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge +; CHECK: csel x1, xzr, [[LO_BIG_SHIFT]], ge ; CHECK: ret %shr = lshr i128 %r, %s diff --git a/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-memcpy-inline.ll index 0590031fbcdc..951076c10b84 100644 --- a/test/CodeGen/AArch64/arm64-memcpy-inline.ll +++ b/test/CodeGen/AArch64/arm64-memcpy-inline.ll @@ -22,18 +22,18 @@ entry: ; CHECK: strh [[REG1]], [x[[BASEREG2]], #8] ; CHECK: ldr [[REG2:x[0-9]+]], ; CHECK: str [[REG2]], - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) ret i32 0 } define void @t1(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t1: -; CHECK: ldur [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]], #15] -; CHECK: stur [[DEST]], [x0, #15] ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] -; CHECK: str [[DEST]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) +; CHECK: ldur [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]], #15] +; CHECK: stur [[DEST:q[0-9]+]], [x0, #15] +; CHECK: str [[DEST:q[0-9]+]], [x0] + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -45,18 +45,18 @@ entry: ; CHECK: str [[REG3]], [x0, #32] ; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}] ; CHECK: stp [[DEST1]], [[DEST2]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false) ret void } define void @t3(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t3: +; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] ; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG:[0-9]+]], #16] ; CHECK: str [[REG4]], [x0, #16] -; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] ; CHECK: str [[DEST]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } @@ -67,7 +67,7 @@ entry: ; CHECK: strh [[REG5]], [x0, #16] ; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}] ; CHECK: str [[REG6]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false) ret void } @@ -80,7 +80,7 @@ entry: ; CHECK: mov [[REG8:w[0-9]+]], ; CHECK: movk [[REG8]], ; CHECK: str [[REG8]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false) ret void } @@ -91,7 +91,7 @@ entry: ; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6] ; CHECK: ldr ; CHECK: str - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false) ret void } @@ -104,9 +104,9 @@ entry: ; CHECK: str [[REG10]], [x0] %0 = bitcast %struct.Foo* %a to i8* %1 = bitcast %struct.Foo* %b to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/CodeGen/AArch64/arm64-memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll index 8c872cc61500..ecdfcc6673aa 100644 --- a/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -5,7 +5,7 @@ entry: ; CHECK-LABEL: t1: ; CHECK: str wzr, [x0, #8] ; CHECK: str xzr, [x0] - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false) ret void } @@ -17,11 +17,11 @@ entry: ; CHECK: str xzr, [sp, #8] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i1 false) call void @something(i8* %0) nounwind ret void } declare void @something(i8*) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/test/CodeGen/AArch64/arm64-memset-to-bzero.ll b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll index 3466e1bace56..0afe1c006b0f 100644 --- a/test/CodeGen/AArch64/arm64-memset-to-bzero.ll +++ b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll @@ -1,28 +1,28 @@ ; RUN: llc %s -mtriple=arm64-apple-darwin -o - | \ -; RUN: FileCheck --check-prefix=CHECK-DARWIN --check-prefix=CHECK %s +; RUN: FileCheck --check-prefixes=CHECK,CHECK-DARWIN %s ; RUN: llc %s -mtriple=arm64-linux-gnu -o - | \ -; RUN: FileCheck --check-prefix=CHECK-LINUX --check-prefix=CHECK %s +; RUN: FileCheck --check-prefixes=CHECK,CHECK-LINUX %s ; <rdar://problem/14199482> ARM64: Calls to bzero() replaced with calls to memset() ; CHECK-LABEL: fct1: ; For small size (<= 256), we do not change memset to bzero. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct1(i8* nocapture %ptr) { +define void @fct1(i8* nocapture %ptr) minsize { entry: - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) ; CHECK-LABEL: fct2: ; When the size is bigger than 256, change into bzero. ; CHECK-DARWIN: {{b|bl}} _bzero ; CHECK-LINUX: {{b|bl}} memset -define void @fct2(i8* nocapture %ptr) { +define void @fct2(i8* nocapture %ptr) minsize { entry: - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false) ret void } @@ -30,10 +30,10 @@ entry: ; For unknown size, change to bzero. ; CHECK-DARWIN: {{b|bl}} _bzero ; CHECK-LINUX: {{b|bl}} memset -define void @fct3(i8* nocapture %ptr, i32 %unknown) { +define void @fct3(i8* nocapture %ptr, i32 %unknown) minsize { entry: %conv = sext i32 %unknown to i64 - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false) ret void } @@ -41,7 +41,7 @@ entry: ; Size <= 256, no change. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct4(i8* %ptr) { +define void @fct4(i8* %ptr) minsize { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64 %tmp) @@ -56,7 +56,7 @@ declare i64 @llvm.objectsize.i64(i8*, i1) ; Size > 256, change. ; CHECK-DARWIN: {{b|bl}} _bzero ; CHECK-LINUX: {{b|bl}} memset -define void @fct5(i8* %ptr) { +define void @fct5(i8* %ptr) minsize { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64 %tmp) @@ -67,7 +67,7 @@ entry: ; Size = unknown, change. ; CHECK-DARWIN: {{b|bl}} _bzero ; CHECK-LINUX: {{b|bl}} memset -define void @fct6(i8* %ptr, i32 %unknown) { +define void @fct6(i8* %ptr, i32 %unknown) minsize { entry: %conv = sext i32 %unknown to i64 %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) @@ -82,7 +82,7 @@ entry: ; memset with something that is not a zero, no change. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct7(i8* %ptr) { +define void @fct7(i8* %ptr) minsize { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64 %tmp) @@ -93,7 +93,7 @@ entry: ; memset with something that is not a zero, no change. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct8(i8* %ptr) { +define void @fct8(i8* %ptr) minsize { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64 %tmp) @@ -104,7 +104,7 @@ entry: ; memset with something that is not a zero, no change. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct9(i8* %ptr, i32 %unknown) { +define void @fct9(i8* %ptr, i32 %unknown) minsize { entry: %conv = sext i32 %unknown to i64 %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) diff --git a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll index 85572f2cf0f8..8216e3d8e5ba 100644 --- a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll +++ b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll @@ -1,14 +1,42 @@ ; RUN: llc -mtriple=arm64-apple-ios -mattr=+strict-align < %s | FileCheck %s -; Small (16-bytes here) unaligned memcpys should stay memcpy calls if +; Small (16 bytes here) unaligned memcpy() should be a function call if ; strict-alignment is turned on. define void @t0(i8* %out, i8* %in) { ; CHECK-LABEL: t0: -; CHECK: orr w2, wzr, #0x10 -; CHECK-NEXT: bl _memcpy +; CHECK: orr w2, wzr, #0x10 +; CHECK-NEXT: bl _memcpy entry: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +; Small (16 bytes here) aligned memcpy() should be inlined even if +; strict-alignment is turned on. +define void @t1(i8* align 8 %out, i8* align 8 %in) { +; CHECK-LABEL: t1: +; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x1] +; CHECK-NEXT: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0] +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %out, i8* align 8 %in, i64 16, i1 false) + ret void +} + +; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized +; loads and stores if strict-alignment is turned on. +define void @t2(i8* %out, i8* %in) { +; CHECK-LABEL: t2: +; CHECK: ldrb w{{[0-9]+}}, [x1, #3] +; CHECK-NEXT: ldrb w{{[0-9]+}}, [x1, #2] +; CHECK-NEXT: ldrb w{{[0-9]+}}, [x1, #1] +; CHECK-NEXT: ldrb w{{[0-9]+}}, [x1] +; CHECK-NEXT: strb w{{[0-9]+}}, [x0, #3] +; CHECK-NEXT: strb w{{[0-9]+}}, [x0, #2] +; CHECK-NEXT: strb w{{[0-9]+}}, [x0, #1] +; CHECK-NEXT: strb w{{[0-9]+}}, [x0] +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll index 07df9cb32dba..f0b9ccc8b5d1 100644 --- a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll +++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll @@ -26,9 +26,9 @@ entry: %yy = alloca i32, align 4 store i32 0, i32* %retval %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) store i32 0, i32* %xx, align 4 store i32 0, i32* %yy, align 4 store i32 0, i32* %i, align 4 @@ -105,7 +105,7 @@ define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) { } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll index 711d2f7397b0..c2f53e88a95a 100644 --- a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll +++ b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll @@ -32,9 +32,9 @@ entry: %yy = alloca i32, align 4 store i32 0, i32* %retval %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) store i32 0, i32* %xx, align 4 store i32 0, i32* %yy, align 4 store i32 0, i32* %i, align 4 @@ -106,7 +106,7 @@ for.end: ; preds = %for.cond ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll index bbb699bbb468..8af6b8220470 100644 --- a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll +++ b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll @@ -6,7 +6,7 @@ ; ; CHECK: ********** MI Scheduling ********** ; CHECK: shiftable -; CHECK: SU(2): %2:gpr64common = SUBXri %1, 20, 0 +; CHECK: SU(2): %2:gpr64common = SUBXri %1:gpr64common, 20, 0 ; CHECK: Successors: ; CHECK-NEXT: SU(4): Data Latency=1 Reg=%2 ; CHECK-NEXT: SU(3): Data Latency=2 Reg=%2 diff --git a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll index 36de403a0c8f..6d81d9acd861 100644 --- a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll +++ b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll @@ -5,15 +5,15 @@ ; ; CHECK: ********** MI Scheduling ********** ; CHECK: misched_bug:%bb.0 entry -; CHECK: SU(2): %2:gpr32 = LDRWui %0, 1; mem:LD4[%ptr1_plus1] GPR32:%2 GPR64common:%0 +; CHECK: SU(2): %2:gpr32 = LDRWui %0:gpr64common, 1 :: (load 4 from %ir.ptr1_plus1) ; CHECK: Successors: ; CHECK-NEXT: SU(5): Data Latency=4 Reg=%2 ; CHECK-NEXT: SU(4): Ord Latency=0 -; CHECK: SU(3): STRWui %wzr, %0, 0; mem:ST4[%ptr1] GPR64common:%0 +; CHECK: SU(3): STRWui $wzr, %0:gpr64common, 0 :: (store 4 into %ir.ptr1) ; CHECK: Successors: ; CHECK: SU(4): Ord Latency=0 -; CHECK: SU(4): STRWui %wzr, %1, 0; mem:ST4[%ptr2] GPR64common:%1 -; CHECK: SU(5): %w0 = COPY %2; GPR32:%2 +; CHECK: SU(4): STRWui $wzr, %1:gpr64common, 0 :: (store 4 into %ir.ptr2) +; CHECK: SU(5): $w0 = COPY %2 ; CHECK: ** ScheduleDAGMI::schedule picking next node define i32 @misched_bug(i32* %ptr1, i32* %ptr2) { entry: diff --git a/test/CodeGen/AArch64/arm64-misched-multimmo.ll b/test/CodeGen/AArch64/arm64-misched-multimmo.ll index 47f2ec790c7a..c35c72f24b09 100644 --- a/test/CodeGen/AArch64/arm64-misched-multimmo.ll +++ b/test/CodeGen/AArch64/arm64-misched-multimmo.ll @@ -8,11 +8,11 @@ ; Check that no scheduling dependencies are created between the paired loads and the store during post-RA MI scheduling. ; ; CHECK-LABEL: # Machine code for function foo: -; CHECK: SU(2): renamable %w{{[0-9]+}}, renamable %w{{[0-9]+}} = LDPWi +; CHECK: SU(2): renamable $w{{[0-9]+}}, renamable $w{{[0-9]+}} = LDPWi ; CHECK: Successors: ; CHECK-NOT: ch SU(4) ; CHECK: SU(3) -; CHECK: SU(4): STRWui %wzr, renamable %x{{[0-9]+}} +; CHECK: SU(4): STRWui $wzr, renamable $x{{[0-9]+}} define i32 @foo() { entry: %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @G2, i64 0, i64 0), align 4 diff --git a/test/CodeGen/AArch64/arm64-movi.ll b/test/CodeGen/AArch64/arm64-movi.ll index 8d6caa81d978..3063b500a324 100644 --- a/test/CodeGen/AArch64/arm64-movi.ll +++ b/test/CodeGen/AArch64/arm64-movi.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-eabi | FileCheck %s ;==--------------------------------------------------------------------------== ; Tests for MOV-immediate implemented with ORR-immediate. @@ -7,35 +8,65 @@ ; 64-bit immed with 32-bit pattern size, rotated by 0. define i64 @test64_32_rot0() nounwind { ; CHECK-LABEL: test64_32_rot0: -; CHECK: mov x0, #30064771079 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #30064771079 +; CHECK-NEXT: ret ret i64 30064771079 } ; 64-bit immed with 32-bit pattern size, rotated by 2. define i64 @test64_32_rot2() nounwind { ; CHECK-LABEL: test64_32_rot2: -; CHECK: mov x0, #-4611686002321260541 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4611686002321260541 +; CHECK-NEXT: ret ret i64 13835058071388291075 } ; 64-bit immed with 4-bit pattern size, rotated by 3. define i64 @test64_4_rot3() nounwind { ; CHECK-LABEL: test64_4_rot3: -; CHECK: mov x0, #-1229782938247303442 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-1229782938247303442 +; CHECK-NEXT: ret ret i64 17216961135462248174 } +; 64-bit immed with 64-bit pattern size, many bits. +define i64 @test64_64_manybits() nounwind { +; CHECK-LABEL: test64_64_manybits: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #4503599627304960 +; CHECK-NEXT: ret + ret i64 4503599627304960 +} + +; 64-bit immed with 64-bit pattern size, one bit. +; FIXME: Prefer movz, so it prints as "mov". +define i64 @test64_64_onebit() nounwind { +; CHECK-LABEL: test64_64_onebit: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x0, xzr, #0x4000000000 +; CHECK-NEXT: ret + ret i64 274877906944 +} + ; 32-bit immed with 32-bit pattern size, rotated by 16. +; FIXME: Prefer "movz" instead (so we print as "mov"). define i32 @test32_32_rot16() nounwind { ; CHECK-LABEL: test32_32_rot16: -; CHECK: orr w0, wzr, #0xff0000 +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0xff0000 +; CHECK-NEXT: ret ret i32 16711680 } ; 32-bit immed with 2-bit pattern size, rotated by 1. define i32 @test32_2_rot1() nounwind { ; CHECK-LABEL: test32_2_rot1: -; CHECK: mov w0, #-1431655766 +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #-1431655766 +; CHECK-NEXT: ret ret i32 2863311530 } @@ -45,47 +76,93 @@ define i32 @test32_2_rot1() nounwind { define i32 @movz() nounwind { ; CHECK-LABEL: movz: -; CHECK: mov w0, #5 +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #5 +; CHECK-NEXT: ret ret i32 5 } define i64 @movz_3movk() nounwind { ; CHECK-LABEL: movz_3movk: -; CHECK: mov x0, #22136 -; CHECK-NEXT: movk x0, #43981, lsl #16 -; CHECK-NEXT: movk x0, #4660, lsl #32 -; CHECK-NEXT: movk x0, #5, lsl #48 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #22136 +; CHECK-NEXT: movk x0, #43981, lsl #16 +; CHECK-NEXT: movk x0, #4660, lsl #32 +; CHECK-NEXT: movk x0, #5, lsl #48 +; CHECK-NEXT: ret ret i64 1427392313513592 } define i64 @movz_movk_skip1() nounwind { ; CHECK-LABEL: movz_movk_skip1: -; CHECK: mov x0, #1126236160 -; CHECK-NEXT: movk x0, #5, lsl #32 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #1126236160 +; CHECK-NEXT: movk x0, #5, lsl #32 +; CHECK-NEXT: ret ret i64 22601072640 } define i64 @movz_skip1_movk() nounwind { ; CHECK-LABEL: movz_skip1_movk: -; CHECK: mov x0, #4660 -; CHECK-NEXT: movk x0, #34388, lsl #32 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #4660 +; CHECK-NEXT: movk x0, #34388, lsl #32 +; CHECK-NEXT: ret ret i64 147695335379508 } +; FIXME: Prefer "mov w0, #2863311530; lsl x0, x0, #4" +; or "mov x0, #-6148914691236517206; and x0, x0, #45812984480" +define i64 @orr_lsl_pattern() nounwind { +; CHECK-LABEL: orr_lsl_pattern: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #43680 +; CHECK-NEXT: movk x0, #43690, lsl #16 +; CHECK-NEXT: movk x0, #10, lsl #32 +; CHECK-NEXT: ret + ret i64 45812984480 +} + +; FIXME: prefer "mov x0, #-16639; lsl x0, x0, #24" +define i64 @mvn_lsl_pattern() nounwind { +; CHECK-LABEL: mvn_lsl_pattern: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #16777216 +; CHECK-NEXT: movk x0, #65471, lsl #32 +; CHECK-NEXT: movk x0, #65535, lsl #48 +; CHECK-NEXT: ret + ret i64 -279156097024 +} + +; FIXME: prefer "mov w0, #-63; movk x0, #17, lsl #32" +define i64 @mvn32_pattern_2() nounwind { +; CHECK-LABEL: mvn32_pattern_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #65473 +; CHECK-NEXT: movk x0, #65535, lsl #16 +; CHECK-NEXT: movk x0, #17, lsl #32 +; CHECK-NEXT: ret + ret i64 77309411265 +} + ;==--------------------------------------------------------------------------== ; Tests for MOVN with MOVK. ;==--------------------------------------------------------------------------== define i64 @movn() nounwind { ; CHECK-LABEL: movn: -; CHECK: mov x0, #-42 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-42 +; CHECK-NEXT: ret ret i64 -42 } define i64 @movn_skip1_movk() nounwind { ; CHECK-LABEL: movn_skip1_movk: -; CHECK: mov x0, #-60876 -; CHECK-NEXT: movk x0, #65494, lsl #32 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-60876 +; CHECK-NEXT: movk x0, #65494, lsl #32 +; CHECK-NEXT: ret ret i64 -176093720012 } @@ -96,107 +173,283 @@ define i64 @movn_skip1_movk() nounwind { define i64 @orr_movk1() nounwind { ; CHECK-LABEL: orr_movk1: -; CHECK: mov x0, #72056494543077120 -; CHECK: movk x0, #57005, lsl #16 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #72056494543077120 +; CHECK-NEXT: movk x0, #57005, lsl #16 +; CHECK-NEXT: ret ret i64 72056498262245120 } define i64 @orr_movk2() nounwind { ; CHECK-LABEL: orr_movk2: -; CHECK: mov x0, #72056494543077120 -; CHECK: movk x0, #57005, lsl #48 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #72056494543077120 +; CHECK-NEXT: movk x0, #57005, lsl #48 +; CHECK-NEXT: ret ret i64 -2400982650836746496 } define i64 @orr_movk3() nounwind { ; CHECK-LABEL: orr_movk3: -; CHECK: mov x0, #72056494543077120 -; CHECK: movk x0, #57005, lsl #32 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #72056494543077120 +; CHECK-NEXT: movk x0, #57005, lsl #32 +; CHECK-NEXT: ret ret i64 72020953688702720 } define i64 @orr_movk4() nounwind { ; CHECK-LABEL: orr_movk4: -; CHECK: mov x0, #72056494543077120 -; CHECK: movk x0, #57005 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #72056494543077120 +; CHECK-NEXT: movk x0, #57005 +; CHECK-NEXT: ret ret i64 72056494543068845 } ; rdar://14987618 define i64 @orr_movk5() nounwind { ; CHECK-LABEL: orr_movk5: -; CHECK: mov x0, #-71777214294589696 -; CHECK: movk x0, #57005, lsl #16 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-71777214294589696 +; CHECK-NEXT: movk x0, #57005, lsl #16 +; CHECK-NEXT: ret ret i64 -71777214836900096 } define i64 @orr_movk6() nounwind { ; CHECK-LABEL: orr_movk6: -; CHECK: mov x0, #-71777214294589696 -; CHECK: movk x0, #57005, lsl #16 -; CHECK: movk x0, #57005, lsl #48 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-71777214294589696 +; CHECK-NEXT: movk x0, #57005, lsl #16 +; CHECK-NEXT: movk x0, #57005, lsl #48 +; CHECK-NEXT: ret ret i64 -2400982647117578496 } define i64 @orr_movk7() nounwind { ; CHECK-LABEL: orr_movk7: -; CHECK: mov x0, #-71777214294589696 -; CHECK: movk x0, #57005, lsl #48 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-71777214294589696 +; CHECK-NEXT: movk x0, #57005, lsl #48 +; CHECK-NEXT: ret ret i64 -2400982646575268096 } define i64 @orr_movk8() nounwind { ; CHECK-LABEL: orr_movk8: -; CHECK: mov x0, #-71777214294589696 -; CHECK: movk x0, #57005 -; CHECK: movk x0, #57005, lsl #48 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-71777214294589696 +; CHECK-NEXT: movk x0, #57005 +; CHECK-NEXT: movk x0, #57005, lsl #48 +; CHECK-NEXT: ret ret i64 -2400982646575276371 } ; rdar://14987715 define i64 @orr_movk9() nounwind { ; CHECK-LABEL: orr_movk9: -; CHECK: mov x0, #1152921435887370240 -; CHECK: movk x0, #65280 -; CHECK: movk x0, #57005, lsl #16 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #1152921435887370240 +; CHECK-NEXT: movk x0, #65280 +; CHECK-NEXT: movk x0, #57005, lsl #16 +; CHECK-NEXT: ret ret i64 1152921439623315200 } define i64 @orr_movk10() nounwind { ; CHECK-LABEL: orr_movk10: -; CHECK: mov x0, #1152921504606846720 -; CHECK: movk x0, #57005, lsl #16 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #1152921504606846720 +; CHECK-NEXT: movk x0, #57005, lsl #16 +; CHECK-NEXT: ret ret i64 1152921504047824640 } define i64 @orr_movk11() nounwind { ; CHECK-LABEL: orr_movk11: -; CHECK: mov x0, #-4503599627370241 -; CHECK: movk x0, #57005, lsl #16 -; CHECK: movk x0, #65535, lsl #32 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-65281 +; CHECK-NEXT: movk x0, #57005, lsl #16 +; CHECK-NEXT: movk x0, #65520, lsl #48 +; CHECK-NEXT: ret ret i64 -4222125209747201 } define i64 @orr_movk12() nounwind { ; CHECK-LABEL: orr_movk12: -; CHECK: mov x0, #-4503599627370241 -; CHECK: movk x0, #57005, lsl #32 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4503599627370241 +; CHECK-NEXT: movk x0, #57005, lsl #32 +; CHECK-NEXT: ret ret i64 -4258765016661761 } define i64 @orr_movk13() nounwind { ; CHECK-LABEL: orr_movk13: -; CHECK: mov x0, #17592169267200 -; CHECK: movk x0, #57005 -; CHECK: movk x0, #57005, lsl #48 +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #17592169267200 +; CHECK-NEXT: movk x0, #57005 +; CHECK-NEXT: movk x0, #57005, lsl #48 +; CHECK-NEXT: ret ret i64 -2401245434149282131 } ; rdar://13944082 define i64 @g() nounwind { ; CHECK-LABEL: g: -; CHECK: mov x0, #2 -; CHECK: movk x0, #65535, lsl #48 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x0, #2 +; CHECK-NEXT: movk x0, #65535, lsl #48 +; CHECK-NEXT: ret entry: ret i64 -281474976710654 } + +define i64 @orr_movk14() nounwind { +; CHECK-LABEL: orr_movk14: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-549755813888 +; CHECK-NEXT: movk x0, #2048, lsl #16 +; CHECK-NEXT: ret + ret i64 -549621596160 +} + +define i64 @orr_movk15() nounwind { +; CHECK-LABEL: orr_movk15: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #549755813887 +; CHECK-NEXT: movk x0, #63487, lsl #16 +; CHECK-NEXT: ret + ret i64 549621596159 +} + +; FIXME: prefer "mov x0, #2147483646; orr x0, x0, #36028659580010496" +define i64 @orr_movk16() nounwind { +; CHECK-LABEL: orr_movk16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #36028659580010496 +; CHECK-NEXT: movk x0, #65534 +; CHECK-NEXT: movk x0, #32767, lsl #16 +; CHECK-NEXT: ret + ret i64 36028661727494142 +} + +define i64 @orr_movk17() nounwind { +; CHECK-LABEL: orr_movk17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-1099511627776 +; CHECK-NEXT: movk x0, #65280, lsl #16 +; CHECK-NEXT: ret + ret i64 -1095233437696 +} + +define i64 @orr_movk18() nounwind { +; CHECK-LABEL: orr_movk18: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #137438887936 +; CHECK-NEXT: movk x0, #65473 +; CHECK-NEXT: ret + ret i64 137438953409 +} + +; FIXME: prefer "mov x0, #72340172838076673; and x0, x0, #2199023255296" +define i64 @orr_and() nounwind { +; CHECK-LABEL: orr_and: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #256 +; CHECK-NEXT: movk x0, #257, lsl #16 +; CHECK-NEXT: movk x0, #257, lsl #32 +; CHECK-NEXT: ret + ret i64 1103823438080 +} + +; FIXME: prefer "mov w0, #-1431655766; movk x0, #9, lsl #32" +define i64 @movn_movk() nounwind { +; CHECK-LABEL: movn_movk: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #43690 +; CHECK-NEXT: movk x0, #43690, lsl #16 +; CHECK-NEXT: movk x0, #9, lsl #32 +; CHECK-NEXT: ret + ret i64 41518017194 +} + +; FIXME: prefer "mov w0, #-13690; orr x0, x0, #0x1111111111111111" +define i64 @movn_orr() nounwind { +; CHECK-LABEL: movn_orr: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-51847 +; CHECK-NEXT: movk x0, #4369, lsl #32 +; CHECK-NEXT: movk x0, #4369, lsl #48 +; CHECK-NEXT: ret + ret i64 1229782942255887737 +} + +; FIXME: prefer "mov w0, #-305397761; eor x0, x0, #0x3333333333333333" +define i64 @movn_eor() nounwind { +; CHECK-LABEL: movn_eor: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #3689348814741910323 +; CHECK-NEXT: movk x0, #52428 +; CHECK-NEXT: movk x0, #8455, lsl #16 +; CHECK-NEXT: ret + ret i64 3689348814437076172 +} + +; FIXME: prefer "mov x0, #536866816; orr x0, x0, #0x3fff800000000000" +define i64 @orr_orr_64() nounwind { +; CHECK-LABEL: orr_orr_64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #4611545280939032576 +; CHECK-NEXT: movk x0, #61440 +; CHECK-NEXT: movk x0, #8191, lsl #16 +; CHECK-NEXT: ret + ret i64 4611545281475899392 +} + +; FIXME: prefer "mov x0, #558551907040256; orr x0, x0, #0x1000100010001000" +define i64 @orr_orr_32() nounwind { +; CHECK-LABEL: orr_orr_32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-287953294993589248 +; CHECK-NEXT: movk x0, #7169, lsl #16 +; CHECK-NEXT: movk x0, #7169, lsl #48 +; CHECK-NEXT: ret + ret i64 2018171185438784512 +} + +; FIXME: prefer "mov x0, #281479271743489; orr x0, x0, #0x1000100010001000" +define i64 @orr_orr_16() nounwind { +; CHECK-LABEL: orr_orr_16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #4097 +; CHECK-NEXT: movk x0, #4097, lsl #16 +; CHECK-NEXT: movk x0, #4097, lsl #32 +; CHECK-NEXT: movk x0, #4097, lsl #48 +; CHECK-NEXT: ret + ret i64 1153220576333074433 +} + +; FIXME: prefer "mov x0, #144680345676153346; orr x0, x0, #0x1818181818181818" +define i64 @orr_orr_8() nounwind { +; CHECK-LABEL: orr_orr_8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #6682 +; CHECK-NEXT: movk x0, #6682, lsl #16 +; CHECK-NEXT: movk x0, #6682, lsl #32 +; CHECK-NEXT: movk x0, #6682, lsl #48 +; CHECK-NEXT: ret + ret i64 1880844493789993498 +} + +; FIXME: prefer "mov x0, #-6148914691236517206; orr x0, x0, #0x0FFFFF0000000000" +define i64 @orr_64_orr_8() nounwind { +; CHECK-LABEL: orr_64_orr_8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-6148914691236517206 +; CHECK-NEXT: movk x0, #65450, lsl #32 +; CHECK-NEXT: movk x0, #45055, lsl #48 +; CHECK-NEXT: ret + ret i64 -5764607889538110806 +} diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll index b3a2bcd5d669..c2412371d14f 100644 --- a/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,GENERIC ; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check. ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m1 | FileCheck %s --check-prefixes=CHECK,EXYNOSM1 +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m3 | FileCheck %s --check-prefixes=CHECK,EXYNOSM3 declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) @@ -353,6 +354,7 @@ define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x floa ; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] ; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -366,6 +368,7 @@ define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x flo ; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] ; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -379,6 +382,7 @@ define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x flo ; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] ; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -390,6 +394,7 @@ define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x fl ; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] ; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -401,6 +406,7 @@ define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x floa ; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] ; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] entry: %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> @@ -413,6 +419,7 @@ define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x flo ; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] ; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] entry: %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> @@ -425,6 +432,7 @@ define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x flo ; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] ; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] entry: %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3> @@ -437,6 +445,7 @@ define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x fl ; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] ; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] entry: %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> @@ -449,6 +458,7 @@ define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x ; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] entry: %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -462,6 +472,7 @@ define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x ; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] ; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -473,6 +484,7 @@ define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x ; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] entry: %sub = fsub <1 x double> <double -0.000000e+00>, %v %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer @@ -485,6 +497,7 @@ define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x ; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] ; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] entry: %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1> @@ -1311,6 +1324,7 @@ define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { ; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] ; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> %mul = fmul <2 x float> %shuffle, %a @@ -1334,6 +1348,7 @@ define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { ; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] ; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %mul = fmul <4 x float> %shuffle, %a @@ -1345,6 +1360,7 @@ define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { ; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; EXYNOSM3: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -1356,6 +1372,7 @@ define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { ; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] ; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> %mul = fmul <2 x float> %shuffle, %a @@ -1379,6 +1396,7 @@ define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] ; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %mul = fmul <4 x float> %shuffle, %a @@ -1390,6 +1408,7 @@ define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] ; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> %mul = fmul <2 x double> %shuffle, %a @@ -1401,6 +1420,7 @@ define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { ; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] ; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1412,6 +1432,7 @@ define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { ; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] ; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1423,6 +1444,7 @@ define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { ; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1434,6 +1456,7 @@ define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { ; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] ; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1445,6 +1468,7 @@ define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] ; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1456,6 +1480,7 @@ define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] ; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1771,6 +1796,7 @@ define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x fl ; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1782,6 +1808,7 @@ define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x f ; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1793,6 +1820,7 @@ define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x f ; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1804,6 +1832,7 @@ define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x ; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1815,6 +1844,7 @@ define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x fl ; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] entry: %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer @@ -1827,6 +1857,7 @@ define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x f ; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] entry: %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer @@ -1839,6 +1870,7 @@ define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x f ; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] entry: %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer @@ -1851,6 +1883,7 @@ define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x ; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] entry: %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer @@ -1863,6 +1896,7 @@ define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 ; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -1874,6 +1908,7 @@ define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 ; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] entry: %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer @@ -2606,6 +2641,7 @@ define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2617,6 +2653,7 @@ define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2628,6 +2665,7 @@ define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2651,6 +2689,7 @@ define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2662,6 +2701,7 @@ define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -2673,6 +2713,7 @@ define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2684,6 +2725,7 @@ define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2695,6 +2737,7 @@ define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { ; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -2706,6 +2749,7 @@ define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] ; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2717,6 +2761,7 @@ define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] ; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2728,6 +2773,7 @@ define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] ; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d +; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -2741,6 +2787,8 @@ define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, ; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] ; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s ; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] entry: %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) @@ -2758,6 +2806,8 @@ define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> ; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s ; EXYNOSM1: dup [[W:v[0-9]+]].4s, {{v[0-9]+}}.s[1] ; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[W]].4s +; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] entry: %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) @@ -2785,3 +2835,12 @@ entry: %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) ret <2 x float> %0 } + +define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m3(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m3" { +; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3: +; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +entry: + %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> + %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) + ret <2 x float> %0 +} diff --git a/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll b/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll index cba81ef99b94..7cc5a43d53ce 100644 --- a/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll +++ b/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll @@ -858,114 +858,116 @@ define <2 x i64> @cmneqz2xi64(<2 x i64> %A) { } define <8 x i8> @cmhsz8xi8(<8 x i8> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].8b, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b - %tmp3 = icmp uge <8 x i8> %A, zeroinitializer; + %tmp3 = icmp uge <8 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } define <16 x i8> @cmhsz16xi8(<16 x i8> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].16b, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b - %tmp3 = icmp uge <16 x i8> %A, zeroinitializer; + %tmp3 = icmp uge <16 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } define <4 x i16> @cmhsz4xi16(<4 x i16> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].4h, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h - %tmp3 = icmp uge <4 x i16> %A, zeroinitializer; + %tmp3 = icmp uge <4 x i16> %A, <i16 2, i16 2, i16 2, i16 2> %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } define <8 x i16> @cmhsz8xi16(<8 x i16> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].8h, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h - %tmp3 = icmp uge <8 x i16> %A, zeroinitializer; + %tmp3 = icmp uge <8 x i16> %A, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } define <2 x i32> @cmhsz2xi32(<2 x i32> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].2s, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s - %tmp3 = icmp uge <2 x i32> %A, zeroinitializer; + %tmp3 = icmp uge <2 x i32> %A, <i32 2, i32 2> %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @cmhsz4xi32(<4 x i32> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].4s, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s - %tmp3 = icmp uge <4 x i32> %A, zeroinitializer; + %tmp3 = icmp uge <4 x i32> %A, <i32 2, i32 2, i32 2, i32 2> %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @cmhsz2xi64(<2 x i64> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: orr w[[TWO:[0-9]+]], wzr, #0x2 +;CHECK-NEXT: dup v[[ZERO:[0-9]+]].2d, x[[TWO]] ;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d - %tmp3 = icmp uge <2 x i64> %A, zeroinitializer; + %tmp3 = icmp uge <2 x i64> %A, <i64 2, i64 2> %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } define <8 x i8> @cmhiz8xi8(<8 x i8> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].8b, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b - %tmp3 = icmp ugt <8 x i8> %A, zeroinitializer; + %tmp3 = icmp ugt <8 x i8> %A, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } define <16 x i8> @cmhiz16xi8(<16 x i8> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].16b, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b - %tmp3 = icmp ugt <16 x i8> %A, zeroinitializer; + %tmp3 = icmp ugt <16 x i8> %A, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } define <4 x i16> @cmhiz4xi16(<4 x i16> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].4h, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h - %tmp3 = icmp ugt <4 x i16> %A, zeroinitializer; + %tmp3 = icmp ugt <4 x i16> %A, <i16 1, i16 1, i16 1, i16 1> %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } define <8 x i16> @cmhiz8xi16(<8 x i16> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].8h, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h - %tmp3 = icmp ugt <8 x i16> %A, zeroinitializer; + %tmp3 = icmp ugt <8 x i16> %A, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } define <2 x i32> @cmhiz2xi32(<2 x i32> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].2s, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s - %tmp3 = icmp ugt <2 x i32> %A, zeroinitializer; + %tmp3 = icmp ugt <2 x i32> %A, <i32 1, i32 1> %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @cmhiz4xi32(<4 x i32> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].4s, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s - %tmp3 = icmp ugt <4 x i32> %A, zeroinitializer; + %tmp3 = icmp ugt <4 x i32> %A, <i32 1, i32 1, i32 1, i32 1> %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @cmhiz2xi64(<2 x i64> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: orr w[[ONE:[0-9]+]], wzr, #0x1 +;CHECK-NEXT: dup v[[ZERO:[0-9]+]].2d, x[[ONE]] ;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d - %tmp3 = icmp ugt <2 x i64> %A, zeroinitializer; + %tmp3 = icmp ugt <2 x i64> %A, <i64 1, i64 1> %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -1043,9 +1045,9 @@ define <2 x i64> @cmlsz2xi64(<2 x i64> %A) { define <8 x i8> @cmloz8xi8(<8 x i8> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].8b, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v[[ZERO]].8b, {{v[0-9]+}}.8b - %tmp3 = icmp ult <8 x i8> %A, zeroinitializer; + %tmp3 = icmp ult <8 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } @@ -1053,9 +1055,9 @@ define <8 x i8> @cmloz8xi8(<8 x i8> %A) { define <16 x i8> @cmloz16xi8(<16 x i8> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].16b, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b - %tmp3 = icmp ult <16 x i8> %A, zeroinitializer; + %tmp3 = icmp ult <16 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } @@ -1063,9 +1065,9 @@ define <16 x i8> @cmloz16xi8(<16 x i8> %A) { define <4 x i16> @cmloz4xi16(<4 x i16> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].4h, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h - %tmp3 = icmp ult <4 x i16> %A, zeroinitializer; + %tmp3 = icmp ult <4 x i16> %A, <i16 2, i16 2, i16 2, i16 2> %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } @@ -1073,9 +1075,9 @@ define <4 x i16> @cmloz4xi16(<4 x i16> %A) { define <8 x i16> @cmloz8xi16(<8 x i16> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].8h, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h - %tmp3 = icmp ult <8 x i16> %A, zeroinitializer; + %tmp3 = icmp ult <8 x i16> %A, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } @@ -1083,9 +1085,9 @@ define <8 x i16> @cmloz8xi16(<8 x i16> %A) { define <2 x i32> @cmloz2xi32(<2 x i32> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].2s, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s - %tmp3 = icmp ult <2 x i32> %A, zeroinitializer; + %tmp3 = icmp ult <2 x i32> %A, <i32 2, i32 2> %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } @@ -1093,9 +1095,9 @@ define <2 x i32> @cmloz2xi32(<2 x i32> %A) { define <4 x i32> @cmloz4xi32(<4 x i32> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].4s, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v[[ZERO]].4s, v0.4s - %tmp3 = icmp ult <4 x i32> %A, zeroinitializer; + %tmp3 = icmp ult <4 x i32> %A, <i32 2, i32 2, i32 2, i32 2> %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } @@ -1103,9 +1105,10 @@ define <4 x i32> @cmloz4xi32(<4 x i32> %A) { define <2 x i64> @cmloz2xi64(<2 x i64> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: orr w[[TWO:[0-9]+]], wzr, #0x2 +;CHECK-NEXT: dup v[[ZERO:[0-9]+]].2d, x[[TWO]] ;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v[[ZERO]].2d, v0.2d - %tmp3 = icmp ult <2 x i64> %A, zeroinitializer; + %tmp3 = icmp ult <2 x i64> %A, <i64 2, i64 2> %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll index 84df9d6ac0cc..2a9e545165e9 100644 --- a/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -925,7 +925,7 @@ define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) { ; CHECK-LABEL: test_extracts_inserts_varidx_insert: ; CHECK: and [[MASKED_IDX:x[0-9]+]], x0, #0x3 ; CHECK: bfi x9, [[MASKED_IDX]], #1, #2 -; CHECK: st1 { v0.h }[0], [x9] +; CHECK: str h0, [x9] ; CHECK-DAG: ldr d[[R:[0-9]+]] ; CHECK-DAG: mov v[[R]].h[1], v0.h[1] ; CHECK-DAG: mov v[[R]].h[2], v0.h[2] diff --git a/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll b/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll new file mode 100644 index 000000000000..bb67874b0165 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll @@ -0,0 +1,79 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <16 x i8> @div16xi8(<16 x i8> %x) { +; CHECK-LABEL: div16xi8: +; CHECK: movi [[DIVISOR:(v[0-9]+)]].16b, #41 +; CHECK-NEXT: smull2 [[SMULL2:(v[0-9]+)]].8h, v0.16b, [[DIVISOR]].16b +; CHECK-NEXT: smull [[SMULL:(v[0-9]+)]].8h, v0.8b, [[DIVISOR]].8b +; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).16b]], [[SMULL]].16b, [[SMULL2]].16b +; CHECK-NEXT: sshr [[SSHR:(v[0-9]+.16b)]], [[UZP2]], #2 +; CHECK-NEXT: usra v0.16b, [[SSHR]], #7 + %div = sdiv <16 x i8> %x, <i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25> + ret <16 x i8> %div +} + +define <8 x i16> @div8xi16(<8 x i16> %x) { +; CHECK-LABEL: div8xi16: +; CHECK: mov [[TMP:(w[0-9]+)]], #40815 +; CHECK-NEXT: dup [[DIVISOR:(v[0-9]+)]].8h, [[TMP]] +; CHECK-NEXT: smull2 [[SMULL2:(v[0-9]+)]].4s, v0.8h, [[DIVISOR]].8h +; CHECK-NEXT: smull [[SMULL:(v[0-9]+)]].4s, v0.4h, [[DIVISOR]].4h +; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).8h]], [[SMULL]].8h, [[SMULL2]].8h +; CHECK-NEXT: add [[ADD:(v[0-9]+).8h]], [[UZP2]], v0.8h +; CHECK-NEXT: sshr [[SSHR:(v[0-9]+).8h]], [[ADD]], #12 +; CHECK-NEXT: usra v0.8h, [[SSHR]], #15 + %div = sdiv <8 x i16> %x, <i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577> + ret <8 x i16> %div +} + +define <4 x i32> @div32xi4(<4 x i32> %x) { +; CHECK-LABEL: div32xi4: +; CHECK: mov [[TMP:(w[0-9]+)]], #7527 +; CHECK-NEXT: movk [[TMP]], #28805, lsl #16 +; CHECK-NEXT: dup [[DIVISOR:(v[0-9]+)]].4s, [[TMP]] +; CHECK-NEXT: smull2 [[SMULL2:(v[0-9]+)]].2d, v0.4s, [[DIVISOR]].4s +; CHECK-NEXT: smull [[SMULL:(v[0-9]+)]].2d, v0.2s, [[DIVISOR]].2s +; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).4s]], [[SMULL]].4s, [[SMULL2]].4s +; CHECK-NEXT: sshr [[SSHR:(v[0-9]+.4s)]], [[UZP2]], #22 +; CHECK-NEXT: usra v0.4s, [[UZP2]], #31 + %div = sdiv <4 x i32> %x, <i32 9542677, i32 9542677, i32 9542677, i32 9542677> + ret <4 x i32> %div +} + +define <16 x i8> @udiv16xi8(<16 x i8> %x) { +; CHECK-LABEL: udiv16xi8: +; CHECK: movi [[DIVISOR:(v[0-9]+)]].16b, #121 +; CHECK-NEXT: umull2 [[UMULL2:(v[0-9]+)]].8h, v0.16b, [[DIVISOR]].16b +; CHECK-NEXT: umull [[UMULL:(v[0-9]+)]].8h, v0.8b, [[DIVISOR]].8b +; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).16b]], [[UMULL]].16b, [[UMULL2]].16b +; CHECK-NEXT: ushr v0.16b, [[UZP2]], #5 + %div = udiv <16 x i8> %x, <i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68> + ret <16 x i8> %div +} + +define <8 x i16> @udiv8xi16(<8 x i16> %x) { +; CHECK-LABEL: udiv8xi16: +; CHECK: mov [[TMP:(w[0-9]+)]], #16593 +; CHECK-NEXT: dup [[DIVISOR:(v[0-9]+)]].8h, [[TMP]] +; CHECK-NEXT: umull2 [[UMULL2:(v[0-9]+)]].4s, v0.8h, [[DIVISOR]].8h +; CHECK-NEXT: umull [[UMULL:(v[0-9]+)]].4s, v0.4h, [[DIVISOR]].4h +; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).8h]], [[UMULL]].8h, [[SMULL2]].8h +; CHECK-NEXT: sub [[SUB:(v[0-9]+).8h]], v0.8h, [[UZP2]] +; CHECK-NEXT: usra [[USRA:(v[0-9]+).8h]], [[SUB]], #1 +; CHECK-NEXT: ushr v0.8h, [[USRA]], #12 + %div = udiv <8 x i16> %x, <i16 6537, i16 6537, i16 6537, i16 6537, i16 6537, i16 6537, i16 6537, i16 6537> + ret <8 x i16> %div +} + +define <4 x i32> @udiv32xi4(<4 x i32> %x) { +; CHECK-LABEL: udiv32xi4: +; CHECK: mov [[TMP:(w[0-9]+)]], #16747 +; CHECK-NEXT: movk [[TMP]], #31439, lsl #16 +; CHECK-NEXT: dup [[DIVISOR:(v[0-9]+)]].4s, [[TMP]] +; CHECK-NEXT: umull2 [[UMULL2:(v[0-9]+)]].2d, v0.4s, [[DIVISOR]].4s +; CHECK-NEXT: umull [[UMULL:(v[0-9]+)]].2d, v0.2s, [[DIVISOR]].2s +; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).4s]], [[UMULL]].4s, [[SMULL2]].4s +; CHECK-NEXT: ushr v0.4s, [[UZP2]], #22 + %div = udiv <4 x i32> %x, <i32 8743143, i32 8743143, i32 8743143, i32 8743143> + ret <4 x i32> %div +} diff --git a/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll index b63200efb6b5..3c8dae23718f 100644 --- a/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll +++ b/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll @@ -391,6 +391,15 @@ entry: ret void } +define void @test_vst1q_lane0_s16(i16* %a, <8 x i16> %b) { +; CHECK-LABEL: test_vst1q_lane0_s16: +; CHECK: str {{h[0-9]+}}, [x0] +entry: + %0 = extractelement <8 x i16> %b, i32 0 + store i16 %0, i16* %a, align 2 + ret void +} + define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) { ; CHECK-LABEL: test_vst1q_lane_s32: ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] @@ -400,6 +409,15 @@ entry: ret void } +define void @test_vst1q_lane0_s32(i32* %a, <4 x i32> %b) { +; CHECK-LABEL: test_vst1q_lane0_s32: +; CHECK: str {{s[0-9]+}}, [x0] +entry: + %0 = extractelement <4 x i32> %b, i32 0 + store i32 %0, i32* %a, align 4 + ret void +} + define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) { ; CHECK-LABEL: test_vst1q_lane_s64: ; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] @@ -409,6 +427,15 @@ entry: ret void } +define void @test_vst1q_lane0_s64(i64* %a, <2 x i64> %b) { +; CHECK-LABEL: test_vst1q_lane0_s64: +; CHECK: str {{d[0-9]+}}, [x0] +entry: + %0 = extractelement <2 x i64> %b, i32 0 + store i64 %0, i64* %a, align 8 + ret void +} + define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) { ; CHECK-LABEL: test_vst1q_lane_f32: ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] @@ -418,6 +445,15 @@ entry: ret void } +define void @test_vst1q_lane0_f32(float* %a, <4 x float> %b) { +; CHECK-LABEL: test_vst1q_lane0_f32: +; CHECK: str {{s[0-9]+}}, [x0] +entry: + %0 = extractelement <4 x float> %b, i32 0 + store float %0, float* %a, align 4 + ret void +} + define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) { ; CHECK-LABEL: test_vst1q_lane_f64: ; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] @@ -427,6 +463,15 @@ entry: ret void } +define void @test_vst1q_lane0_f64(double* %a, <2 x double> %b) { +; CHECK-LABEL: test_vst1q_lane0_f64: +; CHECK: str {{d[0-9]+}}, [x0] +entry: + %0 = extractelement <2 x double> %b, i32 0 + store double %0, double* %a, align 8 + ret void +} + define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) { ; CHECK-LABEL: test_vst1_lane_s8: ; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] @@ -445,6 +490,15 @@ entry: ret void } +define void @test_vst1_lane0_s16(i16* %a, <4 x i16> %b) { +; CHECK-LABEL: test_vst1_lane0_s16: +; CHECK: str {{h[0-9]+}}, [x0] +entry: + %0 = extractelement <4 x i16> %b, i32 0 + store i16 %0, i16* %a, align 2 + ret void +} + define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) { ; CHECK-LABEL: test_vst1_lane_s32: ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] @@ -454,9 +508,18 @@ entry: ret void } +define void @test_vst1_lane0_s32(i32* %a, <2 x i32> %b) { +; CHECK-LABEL: test_vst1_lane0_s32: +; CHECK: str {{s[0-9]+}}, [x0] +entry: + %0 = extractelement <2 x i32> %b, i32 0 + store i32 %0, i32* %a, align 4 + ret void +} + define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) { ; CHECK-LABEL: test_vst1_lane_s64: -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +; CHECK: str {{d[0-9]+}}, [x0] entry: %0 = extractelement <1 x i64> %b, i32 0 store i64 %0, i64* %a, align 8 @@ -472,6 +535,15 @@ entry: ret void } +define void @test_vst1_lane0_f32(float* %a, <2 x float> %b) { +; CHECK-LABEL: test_vst1_lane0_f32: +; CHECK: str {{s[0-9]+}}, [x0] +entry: + %0 = extractelement <2 x float> %b, i32 0 + store float %0, float* %a, align 4 + ret void +} + define void @test_vst1_lane_f64(double* %a, <1 x double> %b) { ; CHECK-LABEL: test_vst1_lane_f64: ; CHECK: str {{d[0-9]+}}, [x0] diff --git a/test/CodeGen/AArch64/arm64-nvcast.ll b/test/CodeGen/AArch64/arm64-nvcast.ll index ba2512718c4e..d9486127bf11 100644 --- a/test/CodeGen/AArch64/arm64-nvcast.ll +++ b/test/CodeGen/AArch64/arm64-nvcast.ll @@ -31,3 +31,19 @@ entry: store float %v2, float* %p1, align 4 ret void } + + +%"st1" = type { %"subst1", %"subst1", %"subst1" } +%"subst1" = type { %float4 } +%float4 = type { float, float, float, float } + +@_gv = external unnamed_addr global %"st1", align 8 + +define internal void @nvcast_f32_v8i8() { +; CHECK-LABEL: _nvcast_f32_v8i8 +; CHECK: movi.8b v[[REG:[0-9]+]], #254 +; CHECK: str d[[REG]] +entry: + store <2 x float> <float 0xC7DFDFDFC0000000, float 0xC7DFDFDFC0000000>, <2 x float>* bitcast (%"st1"* @_gv to <2 x float>*), align 8 + ret void +} diff --git a/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll index f61f98a4d511..7efb4bf6d596 100644 --- a/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll +++ b/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll @@ -36,9 +36,7 @@ ; HOTNESS-NOT: Executing Pass ; HOTNESS: block-frequency: empty_func ; HOTNESS-NOT: Executing Pass -; HOTNESS: Executing Pass 'MachineDominator Tree Construction' -; HOTNESS-NEXT: Executing Pass 'Machine Natural Loop Construction' -; HOTNESS-NEXT: Executing Pass 'AArch64 Assembly Printer' +; HOTNESS: Executing Pass 'AArch64 Assembly Printer' ; HOTNESS: arm64-summary-remarks.ll:5:0: 1 instructions in function (hotness: 33) @@ -47,8 +45,6 @@ ; NO_HOTNESS-NEXT: Freeing Pass 'Implement the 'patchable-function' attribute' ; NO_HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis' ; NO_HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter' -; NO_HOTNESS-NEXT: Executing Pass 'MachineDominator Tree Construction' -; NO_HOTNESS-NEXT: Executing Pass 'Machine Natural Loop Construction' ; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Assembly Printer' ; NO_HOTNESS: arm64-summary-remarks.ll:5:0: 1 instructions in function{{$}} diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll index ccd12cdf6744..a7d92153f515 100644 --- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll +++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone < %s | FileCheck %s -; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -fast-isel < %s | FileCheck %s --check-prefix=FAST -; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -filetype=obj -o %t %s +; RUN: llc -fast-isel-sink-local-values -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone < %s | FileCheck %s +; RUN: llc -fast-isel-sink-local-values -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -fast-isel < %s | FileCheck %s --check-prefix=FAST +; RUN: llc -fast-isel-sink-local-values -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -filetype=obj -o %t %s ; RUN: llvm-objdump -triple arm64-apple-darwin -d %t | FileCheck %s --check-prefix CHECK-ENCODING ; CHECK-ENCODING-NOT: <unknown> @@ -51,10 +51,10 @@ entry: ; CHECK-NEXT: blr x16 ; FAST-LABEL: jscall_patchpoint_codegen2: ; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2 -; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 -; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 ; FAST-NEXT: str [[REG1]], [sp] +; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 ; FAST-NEXT: str [[REG2]], [sp, #16] +; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 ; FAST-NEXT: str [[REG3]], [sp, #24] ; FAST: Ltmp ; FAST-NEXT: mov x16, #281470681743360 @@ -87,14 +87,14 @@ entry: ; CHECK-NEXT: blr x16 ; FAST-LABEL: jscall_patchpoint_codegen3: ; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2 -; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 -; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 -; FAST-NEXT: orr [[REG4:w[0-9]+]], wzr, #0x8 -; FAST-NEXT: mov [[REG5:x[0-9]+]], #10 ; FAST-NEXT: str [[REG1]], [sp] +; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 ; FAST-NEXT: str [[REG2]], [sp, #16] +; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 ; FAST-NEXT: str [[REG3]], [sp, #24] +; FAST-NEXT: orr [[REG4:w[0-9]+]], wzr, #0x8 ; FAST-NEXT: str [[REG4]], [sp, #36] +; FAST-NEXT: mov [[REG5:x[0-9]+]], #10 ; FAST-NEXT: str [[REG5]], [sp, #48] ; FAST: Ltmp ; FAST-NEXT: mov x16, #281470681743360 diff --git a/test/CodeGen/AArch64/arm64-platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll index 9b5d8a890fa6..145066e697f6 100644 --- a/test/CodeGen/AArch64/arm64-platform-reg.ll +++ b/test/CodeGen/AArch64/arm64-platform-reg.ll @@ -1,7 +1,10 @@ -; RUN: llc -mtriple=arm64-apple-ios -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 -; RUN: llc -mtriple=arm64-freebsd-gnu -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 +; RUN: llc -mtriple=arm64-apple-ios -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE --check-prefix=CHECK-RESERVE-X18 +; RUN: llc -mtriple=arm64-freebsd-gnu -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE --check-prefix=CHECK-RESERVE-X18 +; RUN: llc -mtriple=aarch64-fuchsia -mattr=+reserve-x20 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE --check-prefix=CHECK-RESERVE-X20 +; RUN: llc -mtriple=aarch64-fuchsia -mattr=+reserve-x18,+reserve-x20 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE --check-prefix=CHECK-RESERVE-X18 --check-prefix=CHECK-RESERVE-X20 ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s -; RUN: llc -mtriple=aarch64-windows -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 +; RUN: llc -mtriple=aarch64-fuchsia -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE --check-prefix=CHECK-RESERVE-X18 +; RUN: llc -mtriple=aarch64-windows -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE --check-prefix=CHECK-RESERVE-X18 ; x18 is reserved as a platform register on Darwin but not on other ; systems. Create loads of register pressure and make sure this is respected. @@ -18,11 +21,13 @@ define void @keep_live() { ; CHECK: ldr x18 ; CHECK: str x18 -; CHECK-RESERVE-X18-NOT: ldr fp +; CHECK-RESERVE-NOT: ldr fp ; CHECK-RESERVE-X18-NOT: ldr x18 -; CHECK-RESERVE-X18: Spill -; CHECK-RESERVE-X18-NOT: ldr fp +; CHECK-RESERVE-X20-NOT: ldr x20 +; CHECK-RESERVE: Spill +; CHECK-RESERVE-NOT: ldr fp ; CHECK-RESERVE-X18-NOT: ldr x18 -; CHECK-RESERVE-X18: ret +; CHECK-RESERVE-X20-NOT: ldr x20 +; CHECK-RESERVE: ret ret void } diff --git a/test/CodeGen/AArch64/arm64-register-offset-addressing.ll b/test/CodeGen/AArch64/arm64-register-offset-addressing.ll index 7078ffca5218..61ffad574efe 100644 --- a/test/CodeGen/AArch64/arm64-register-offset-addressing.ll +++ b/test/CodeGen/AArch64/arm64-register-offset-addressing.ll @@ -2,8 +2,7 @@ define i8 @test_64bit_add(i16* %a, i64 %b) { ; CHECK-LABEL: test_64bit_add: -; CHECK: lsl [[REG:x[0-9]+]], x1, #1 -; CHECK: ldrb w0, [x0, [[REG]]] +; CHECK: ldrh w0, [x0, x1, lsl #1] ; CHECK: ret %tmp1 = getelementptr inbounds i16, i16* %a, i64 %b %tmp2 = load i16, i16* %tmp1 diff --git a/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir b/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir index 43d20394be45..b4590756d044 100644 --- a/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir +++ b/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir @@ -1,6 +1,6 @@ # RUN: llc -mtriple=aarch64-linux-gnu -run-pass peephole-opt -o - %s | FileCheck %s # CHECK: %1:gpr32common = ANDWri {{.*}} -# CHECK-NEXT: %wzr = SUBSWri {{.*}} +# CHECK-NEXT: $wzr = SUBSWri {{.*}} --- | define i32 @test01() nounwind { entry: @@ -27,15 +27,15 @@ body: | %0 = MOVi32imm 1 %1 = ANDWri killed %1, 15 - %wzr = SUBSWri killed %1, 0, 0, implicit-def %nzcv - Bcc 9, %bb.2.if.end, implicit %nzcv + $wzr = SUBSWri killed %1, 0, 0, implicit-def $nzcv + Bcc 9, %bb.2.if.end, implicit $nzcv bb.1.if.then: - %w0 = MOVi32imm 1 - RET_ReallyLR implicit %w0 + $w0 = MOVi32imm 1 + RET_ReallyLR implicit $w0 bb.2.if.end: - %w0 = MOVi32imm 0 - RET_ReallyLR implicit %w0 + $w0 = MOVi32imm 0 + RET_ReallyLR implicit $w0 ... diff --git a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll index 0253229c0d7f..7ae739ebf4d5 100644 --- a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -281,7 +281,7 @@ declare void @somethingElse(...) ; Shift second argument by one and store into returned register. ; ENABLE: lsl w0, w1, #1 ; ENABLE: ret -define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { +define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -355,7 +355,7 @@ entry: ; CHECK-NEXT: lsl w0, w1, #1 ; DISABLE-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret -define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 { +define i32 @variadicFunc(i32 %cond, i32 %count, ...) nounwind { entry: %ap = alloca i8*, align 8 %tobool = icmp eq i32 %cond, 0 diff --git a/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll index e72c2b7989d2..98851917999b 100644 --- a/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll +++ b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -O0 -fast-isel -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp { ; CHECK: uaddlv.16b h0, v0 diff --git a/test/CodeGen/AArch64/arm64-spill-remarks-treshold-hotness.ll b/test/CodeGen/AArch64/arm64-spill-remarks-treshold-hotness.ll index fe22296320fc..6a9998835d66 100644 --- a/test/CodeGen/AArch64/arm64-spill-remarks-treshold-hotness.ll +++ b/test/CodeGen/AArch64/arm64-spill-remarks-treshold-hotness.ll @@ -53,7 +53,7 @@ end3: !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"PIC Level", i32 2} !5 = !{!"clang version 3.9.0 "} -!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !7 = !DISubroutineType(types: !2) !8 = !DILocation(line: 1, column: 20, scope: !6) !9 = !DILocation(line: 2, column: 20, scope: !6) diff --git a/test/CodeGen/AArch64/arm64-spill-remarks.ll b/test/CodeGen/AArch64/arm64-spill-remarks.ll index cfebeb496e18..53a16ed748b2 100644 --- a/test/CodeGen/AArch64/arm64-spill-remarks.ll +++ b/test/CodeGen/AArch64/arm64-spill-remarks.ll @@ -135,7 +135,7 @@ end3: !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"PIC Level", i32 2} !5 = !{!"clang version 3.9.0 "} -!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !7 = !DISubroutineType(types: !2) !8 = !DILocation(line: 1, column: 20, scope: !6) !9 = !DILocation(line: 2, column: 20, scope: !6) diff --git a/test/CodeGen/AArch64/arm64-st1.ll b/test/CodeGen/AArch64/arm64-st1.ll index cce5be8ff223..af234a9acf72 100644 --- a/test/CodeGen/AArch64/arm64-st1.ll +++ b/test/CodeGen/AArch64/arm64-st1.ll @@ -4,9 +4,28 @@ define void @st1lane_16b(<16 x i8> %A, i8* %D) { ; CHECK-LABEL: st1lane_16b -; CHECK: st1.b +; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr i8, i8* %D, i64 1 %tmp = extractelement <16 x i8> %A, i32 1 - store i8 %tmp, i8* %D + store i8 %tmp, i8* %ptr + ret void +} + +define void @st1lane0_16b(<16 x i8> %A, i8* %D) { +; CHECK-LABEL: st1lane0_16b +; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}] + %ptr = getelementptr i8, i8* %D, i64 1 + %tmp = extractelement <16 x i8> %A, i32 0 + store i8 %tmp, i8* %ptr + ret void +} + +define void @st1lane0u_16b(<16 x i8> %A, i8* %D) { +; CHECK-LABEL: st1lane0u_16b +; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}] + %ptr = getelementptr i8, i8* %D, i64 -1 + %tmp = extractelement <16 x i8> %A, i32 0 + store i8 %tmp, i8* %ptr ret void } @@ -32,9 +51,28 @@ define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) { define void @st1lane_8h(<8 x i16> %A, i16* %D) { ; CHECK-LABEL: st1lane_8h -; CHECK: st1.h +; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr i16, i16* %D, i64 1 %tmp = extractelement <8 x i16> %A, i32 1 - store i16 %tmp, i16* %D + store i16 %tmp, i16* %ptr + ret void +} + +define void @st1lane0_8h(<8 x i16> %A, i16* %D) { +; CHECK-LABEL: st1lane0_8h +; CHECK: str h0, [x0, #2] + %ptr = getelementptr i16, i16* %D, i64 1 + %tmp = extractelement <8 x i16> %A, i32 0 + store i16 %tmp, i16* %ptr + ret void +} + +define void @st1lane0u_8h(<8 x i16> %A, i16* %D) { +; CHECK-LABEL: st1lane0u_8h +; CHECK: stur h0, [x0, #-2] + %ptr = getelementptr i16, i16* %D, i64 -1 + %tmp = extractelement <8 x i16> %A, i32 0 + store i16 %tmp, i16* %ptr ret void } @@ -59,9 +97,28 @@ define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) { define void @st1lane_4s(<4 x i32> %A, i32* %D) { ; CHECK-LABEL: st1lane_4s -; CHECK: st1.s +; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr i32, i32* %D, i64 1 %tmp = extractelement <4 x i32> %A, i32 1 - store i32 %tmp, i32* %D + store i32 %tmp, i32* %ptr + ret void +} + +define void @st1lane0_4s(<4 x i32> %A, i32* %D) { +; CHECK-LABEL: st1lane0_4s +; CHECK: str s0, [x0, #4] + %ptr = getelementptr i32, i32* %D, i64 1 + %tmp = extractelement <4 x i32> %A, i32 0 + store i32 %tmp, i32* %ptr + ret void +} + +define void @st1lane0u_4s(<4 x i32> %A, i32* %D) { +; CHECK-LABEL: st1lane0u_4s +; CHECK: stur s0, [x0, #-4] + %ptr = getelementptr i32, i32* %D, i64 -1 + %tmp = extractelement <4 x i32> %A, i32 0 + store i32 %tmp, i32* %ptr ret void } @@ -86,9 +143,28 @@ define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) { define void @st1lane_4s_float(<4 x float> %A, float* %D) { ; CHECK-LABEL: st1lane_4s_float -; CHECK: st1.s +; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr float, float* %D, i64 1 %tmp = extractelement <4 x float> %A, i32 1 - store float %tmp, float* %D + store float %tmp, float* %ptr + ret void +} + +define void @st1lane0_4s_float(<4 x float> %A, float* %D) { +; CHECK-LABEL: st1lane0_4s_float +; CHECK: str s0, [x0, #4] + %ptr = getelementptr float, float* %D, i64 1 + %tmp = extractelement <4 x float> %A, i32 0 + store float %tmp, float* %ptr + ret void +} + +define void @st1lane0u_4s_float(<4 x float> %A, float* %D) { +; CHECK-LABEL: st1lane0u_4s_float +; CHECK: stur s0, [x0, #-4] + %ptr = getelementptr float, float* %D, i64 -1 + %tmp = extractelement <4 x float> %A, i32 0 + store float %tmp, float* %ptr ret void } @@ -113,9 +189,28 @@ define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) { define void @st1lane_2d(<2 x i64> %A, i64* %D) { ; CHECK-LABEL: st1lane_2d -; CHECK: st1.d +; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr i64, i64* %D, i64 1 %tmp = extractelement <2 x i64> %A, i32 1 - store i64 %tmp, i64* %D + store i64 %tmp, i64* %ptr + ret void +} + +define void @st1lane0_2d(<2 x i64> %A, i64* %D) { +; CHECK-LABEL: st1lane0_2d +; CHECK: str d0, [x0, #8] + %ptr = getelementptr i64, i64* %D, i64 1 + %tmp = extractelement <2 x i64> %A, i32 0 + store i64 %tmp, i64* %ptr + ret void +} + +define void @st1lane0u_2d(<2 x i64> %A, i64* %D) { +; CHECK-LABEL: st1lane0u_2d +; CHECK: stur d0, [x0, #-8] + %ptr = getelementptr i64, i64* %D, i64 -1 + %tmp = extractelement <2 x i64> %A, i32 0 + store i64 %tmp, i64* %ptr ret void } @@ -140,9 +235,28 @@ define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) { define void @st1lane_2d_double(<2 x double> %A, double* %D) { ; CHECK-LABEL: st1lane_2d_double -; CHECK: st1.d +; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr double, double* %D, i64 1 %tmp = extractelement <2 x double> %A, i32 1 - store double %tmp, double* %D + store double %tmp, double* %ptr + ret void +} + +define void @st1lane0_2d_double(<2 x double> %A, double* %D) { +; CHECK-LABEL: st1lane0_2d_double +; CHECK: str d0, [x0, #8] + %ptr = getelementptr double, double* %D, i64 1 + %tmp = extractelement <2 x double> %A, i32 0 + store double %tmp, double* %ptr + ret void +} + +define void @st1lane0u_2d_double(<2 x double> %A, double* %D) { +; CHECK-LABEL: st1lane0u_2d_double +; CHECK: stur d0, [x0, #-8] + %ptr = getelementptr double, double* %D, i64 -1 + %tmp = extractelement <2 x double> %A, i32 0 + store double %tmp, double* %ptr ret void } @@ -167,9 +281,10 @@ define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) { define void @st1lane_8b(<8 x i8> %A, i8* %D) { ; CHECK-LABEL: st1lane_8b -; CHECK: st1.b +; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr i8, i8* %D, i64 1 %tmp = extractelement <8 x i8> %A, i32 1 - store i8 %tmp, i8* %D + store i8 %tmp, i8* %ptr ret void } @@ -195,9 +310,28 @@ define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) { define void @st1lane_4h(<4 x i16> %A, i16* %D) { ; CHECK-LABEL: st1lane_4h -; CHECK: st1.h +; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr i16, i16* %D, i64 1 %tmp = extractelement <4 x i16> %A, i32 1 - store i16 %tmp, i16* %D + store i16 %tmp, i16* %ptr + ret void +} + +define void @st1lane0_4h(<4 x i16> %A, i16* %D) { +; CHECK-LABEL: st1lane0_4h +; CHECK: str h0, [x0, #2] + %ptr = getelementptr i16, i16* %D, i64 1 + %tmp = extractelement <4 x i16> %A, i32 0 + store i16 %tmp, i16* %ptr + ret void +} + +define void @st1lane0u_4h(<4 x i16> %A, i16* %D) { +; CHECK-LABEL: st1lane0u_4h +; CHECK: stur h0, [x0, #-2] + %ptr = getelementptr i16, i16* %D, i64 -1 + %tmp = extractelement <4 x i16> %A, i32 0 + store i16 %tmp, i16* %ptr ret void } @@ -222,9 +356,28 @@ define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) { define void @st1lane_2s(<2 x i32> %A, i32* %D) { ; CHECK-LABEL: st1lane_2s -; CHECK: st1.s +; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr i32, i32* %D, i64 1 %tmp = extractelement <2 x i32> %A, i32 1 - store i32 %tmp, i32* %D + store i32 %tmp, i32* %ptr + ret void +} + +define void @st1lane0_2s(<2 x i32> %A, i32* %D) { +; CHECK-LABEL: st1lane0_2s +; CHECK: str s0, [x0, #4] + %ptr = getelementptr i32, i32* %D, i64 1 + %tmp = extractelement <2 x i32> %A, i32 0 + store i32 %tmp, i32* %ptr + ret void +} + +define void @st1lane0u_2s(<2 x i32> %A, i32* %D) { +; CHECK-LABEL: st1lane0u_2s +; CHECK: stur s0, [x0, #-4] + %ptr = getelementptr i32, i32* %D, i64 -1 + %tmp = extractelement <2 x i32> %A, i32 0 + store i32 %tmp, i32* %ptr ret void } @@ -249,9 +402,28 @@ define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) { define void @st1lane_2s_float(<2 x float> %A, float* %D) { ; CHECK-LABEL: st1lane_2s_float -; CHECK: st1.s +; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] + %ptr = getelementptr float, float* %D, i64 1 %tmp = extractelement <2 x float> %A, i32 1 - store float %tmp, float* %D + store float %tmp, float* %ptr + ret void +} + +define void @st1lane0_2s_float(<2 x float> %A, float* %D) { +; CHECK-LABEL: st1lane0_2s_float +; CHECK: str s0, [x0, #4] + %ptr = getelementptr float, float* %D, i64 1 + %tmp = extractelement <2 x float> %A, i32 0 + store float %tmp, float* %ptr + ret void +} + +define void @st1lane0u_2s_float(<2 x float> %A, float* %D) { +; CHECK-LABEL: st1lane0u_2s_float +; CHECK: stur s0, [x0, #-4] + %ptr = getelementptr float, float* %D, i64 -1 + %tmp = extractelement <2 x float> %A, i32 0 + store float %tmp, float* %ptr ret void } @@ -274,6 +446,60 @@ define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) { ret void } +define void @st1lane0_1d(<1 x i64> %A, i64* %D) { +; CHECK-LABEL: st1lane0_1d +; CHECK: str d0, [x0, #8] + %ptr = getelementptr i64, i64* %D, i64 1 + %tmp = extractelement <1 x i64> %A, i32 0 + store i64 %tmp, i64* %ptr + ret void +} + +define void @st1lane0u_1d(<1 x i64> %A, i64* %D) { +; CHECK-LABEL: st1lane0u_1d +; CHECK: stur d0, [x0, #-8] + %ptr = getelementptr i64, i64* %D, i64 -1 + %tmp = extractelement <1 x i64> %A, i32 0 + store i64 %tmp, i64* %ptr + ret void +} + +define void @st1lane0_ro_1d(<1 x i64> %A, i64* %D, i64 %offset) { +; CHECK-LABEL: st1lane0_ro_1d +; CHECK: str d0, [x0, x1, lsl #3] + %ptr = getelementptr i64, i64* %D, i64 %offset + %tmp = extractelement <1 x i64> %A, i32 0 + store i64 %tmp, i64* %ptr + ret void +} + +define void @st1lane0_1d_double(<1 x double> %A, double* %D) { +; CHECK-LABEL: st1lane0_1d_double +; CHECK: str d0, [x0, #8] + %ptr = getelementptr double, double* %D, i64 1 + %tmp = extractelement <1 x double> %A, i32 0 + store double %tmp, double* %ptr + ret void +} + +define void @st1lane0u_1d_double(<1 x double> %A, double* %D) { +; CHECK-LABEL: st1lane0u_1d_double +; CHECK: stur d0, [x0, #-8] + %ptr = getelementptr double, double* %D, i64 -1 + %tmp = extractelement <1 x double> %A, i32 0 + store double %tmp, double* %ptr + ret void +} + +define void @st1lane0_ro_1d_double(<1 x double> %A, double* %D, i64 %offset) { +; CHECK-LABEL: st1lane0_ro_1d_double +; CHECK: str d0, [x0, x1, lsl #3] + %ptr = getelementptr double, double* %D, i64 %offset + %tmp = extractelement <1 x double> %A, i32 0 + store double %tmp, double* %ptr + ret void +} + define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) { ; CHECK-LABEL: st2lane_16b ; CHECK: st2.b diff --git a/test/CodeGen/AArch64/arm64-stp-aa.ll b/test/CodeGen/AArch64/arm64-stp-aa.ll index 5b34017cf36a..79c8ec70fcdd 100644 --- a/test/CodeGen/AArch64/arm64-stp-aa.ll +++ b/test/CodeGen/AArch64/arm64-stp-aa.ll @@ -112,7 +112,7 @@ define double @stp_double_aa_after(double %d0, double %a, double %b, double* noc ; Check that the stores %c and %d are paired after the fadd instruction, ; and then the stores %a and %d are paired after proving that they do not -; depend on the the (%c, %d) pair. +; depend on the (%c, %d) pair. ; ; CHECK-LABEL: st1: ; CHECK: stp q0, q1, [x{{[0-9]+}}] diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll index 9239077c166b..d3fd87e5a898 100644 --- a/test/CodeGen/AArch64/arm64-stp.ll +++ b/test/CodeGen/AArch64/arm64-stp.ll @@ -105,8 +105,8 @@ define void @splat_v4i32(i32 %v, i32 *%p) { entry: ; CHECK-LABEL: splat_v4i32 -; CHECK-DAG: stp w0, w0, [x1] -; CHECK-DAG: stp w0, w0, [x1, #8] +; CHECK-DAG: dup v0.4s, w0 +; CHECK-DAG: str q0, [x1] ; CHECK: ret %p17 = insertelement <4 x i32> undef, i32 %v, i32 0 @@ -129,8 +129,7 @@ entry: ; CHECK-DAG: mov v[[REG1]].s[1], w0 ; CHECK-DAG: mov v[[REG1]].s[2], w0 ; CHECK-DAG: mov v[[REG1]].s[3], w0 -; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8 -; CHECK: stp d[[REG1]], d[[REG2]], [x1] +; CHECK: str q[[REG1]], [x1] ; CHECK: ret %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v @@ -151,8 +150,7 @@ entry: ; CHECK: mov v[[REG1]].s[1], w0 ; CHECK-DAG: mov v[[REG1]].s[2], w0 ; CHECK-DAG: mov v[[REG1]].s[3], w0 -; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8 -; CHECK: stp d[[REG1]], d[[REG2]], [x1] +; CHECK: str q[[REG1]], [x1] ; CHECK: ret %p18 = insertelement <4 x i32> %vin, i32 %v, i32 1 diff --git a/test/CodeGen/AArch64/arm64-stur.ll b/test/CodeGen/AArch64/arm64-stur.ll index 4a3229a39b50..d4ac3630bc1a 100644 --- a/test/CodeGen/AArch64/arm64-stur.ll +++ b/test/CodeGen/AArch64/arm64-stur.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone -mattr=+slow-misaligned-128store | FileCheck %s %struct.X = type <{ i32, i64, i64 }> define void @foo1(i32* %p, i64 %val) nounwind { @@ -55,11 +55,11 @@ define void @foo(%struct.X* nocapture %p) nounwind optsize ssp { ; CHECK-NEXT: ret %B = getelementptr inbounds %struct.X, %struct.X* %p, i64 0, i32 1 %val = bitcast i64* %B to i8* - call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; Unaligned 16b stores are split into 8b stores for performance. ; radar://15424193 diff --git a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll index bb9ad46ba63d..9f77d3527d4b 100644 --- a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll +++ b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \ +; RUN: llc -O0 -fast-isel -mtriple=arm64-none-linux-gnu -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=NOEMU %s -; RUN: llc -emulated-tls -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \ +; RUN: llc -emulated-tls -O0 -fast-isel -mtriple=arm64-none-linux-gnu -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=EMU %s ; If the .tlsdesccall and blr parts are emitted completely separately (even with diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll index 6b754b0a169e..53669a15b9ec 100644 --- a/test/CodeGen/AArch64/arm64-vabs.ll +++ b/test/CodeGen/AArch64/arm64-vabs.ll @@ -219,6 +219,40 @@ declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) noun declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone +define <2 x float> @fabd_2s_from_fsub_fabs(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK-LABEL: fabd_2s_from_fsub_fabs: +;CHECK: fabd.2s + %tmp1 = load <2 x float>, <2 x float>* %A + %tmp2 = load <2 x float>, <2 x float>* %B + %sub = fsub <2 x float> %tmp1, %tmp2 + %abs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %sub) + ret <2 x float> %abs +} + +define <4 x float> @fabd_4s_from_fsub_fabs(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK-LABEL: fabd_4s_from_fsub_fabs: +;CHECK: fabd.4s + %tmp1 = load <4 x float>, <4 x float>* %A + %tmp2 = load <4 x float>, <4 x float>* %B + %sub = fsub <4 x float> %tmp1, %tmp2 + %abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %sub) + ret <4 x float> %abs +} + +define <2 x double> @fabd_2d_from_fsub_fabs(<2 x double>* %A, <2 x double>* %B) nounwind { +;CHECK-LABEL: fabd_2d_from_fsub_fabs: +;CHECK: fabd.2d + %tmp1 = load <2 x double>, <2 x double>* %A + %tmp2 = load <2 x double>, <2 x double>* %B + %sub = fsub <2 x double> %tmp1, %tmp2 + %abs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %sub) + ret <2 x double> %abs +} + +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone + define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: sabd_8b: ;CHECK: sabd.8b @@ -829,6 +863,25 @@ define double @fabdd(double %a, double %b) nounwind { declare double @llvm.aarch64.sisd.fabd.f64(double, double) nounwind readnone declare float @llvm.aarch64.sisd.fabd.f32(float, float) nounwind readnone +define float @fabds_from_fsub_fabs(float %a, float %b) nounwind { +; CHECK-LABEL: fabds_from_fsub_fabs: +; CHECK: fabd s0, s0, s1 + %sub = fsub float %a, %b + %abs = tail call float @llvm.fabs.f32(float %sub) + ret float %abs +} + +define double @fabdd_from_fsub_fabs(double %a, double %b) nounwind { +; CHECK-LABEL: fabdd_from_fsub_fabs: +; CHECK: fabd d0, d0, d1 + %sub = fsub double %a, %b + %abs = tail call double @llvm.fabs.f64(double %sub) + ret double %abs +} + +declare float @llvm.fabs.f32(float) nounwind readnone +declare double @llvm.fabs.f64(double) nounwind readnone + define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { ; CHECK-LABEL: uabdl_from_extract_dup: ; CHECK-NOT: ext.16b diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index 0f8f4c5d4a44..09125293cac2 100644 --- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -32,8 +32,8 @@ define void @test_simple(i32 %n, ...) { ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128 ; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; CHECK: mov [[GRVR:x[0-9]+]], #-545460846720 -; CHECK: movk [[GRVR]], #65480 +; CHECK: mov [[GRVR:x[0-9]+]], #-56 +; CHECK: movk [[GRVR]], #65408, lsl #32 ; CHECK: str [[GRVR]], [x[[VA_LIST]], #24] %addr = bitcast %va_list* @var to i8* @@ -130,12 +130,9 @@ define void @test_va_copy() { ; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var -; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]]] +; CHECK: ldp [[BLOCK:q[0-9]+]], [[BLOCK:q[0-9]+]], [x[[SRC]]] ; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list -; CHECK: str [[BLOCK]], [x[[DST]]] - -; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]], #16] -; CHECK: str [[BLOCK]], [x[[DST]], #16] +; CHECK: stp [[BLOCK:q[0-9]+]], [[BLOCK:q[0-9]+]], [x[[DST]]] ret void ; CHECK: ret } diff --git a/test/CodeGen/AArch64/arm64-vcvt_f.ll b/test/CodeGen/AArch64/arm64-vcvt_f.ll index 254671a3c3c5..90cc2d37882c 100644 --- a/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s -; RUN: llc < %s -O0 -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp { ; CHECK-LABEL: test_vcvt_f64_f32: diff --git a/test/CodeGen/AArch64/arm64-vector-insertion.ll b/test/CodeGen/AArch64/arm64-vector-insertion.ll index 7d72b489c3be..23f9d9060344 100644 --- a/test/CodeGen/AArch64/arm64-vector-insertion.ll +++ b/test/CodeGen/AArch64/arm64-vector-insertion.ll @@ -8,7 +8,7 @@ entry: ret void ; CHECK-LABEL: test0f - ; CHECK: movi.2d v[[TEMP:[0-9]+]], #0000000000000000 + ; CHECK: movi.2d v[[TEMP:[0-9]+]], #0 ; CHECK: mov.s v[[TEMP]][0], v{{[0-9]+}}[0] ; CHECK: str q[[TEMP]], [x0] ; CHECK: ret @@ -16,7 +16,6 @@ entry: } - define void @test1f(float* nocapture %x, float %a) #0 { entry: %0 = insertelement <4 x float> <float undef, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float %a, i32 0 @@ -25,9 +24,8 @@ entry: ret void ; CHECK-LABEL: test1f - ; CHECK: fmov s[[TEMP:[0-9]+]], #1.0000000 - ; CHECK: dup.4s v[[TEMP2:[0-9]+]], v[[TEMP]][0] - ; CHECK: mov.s v[[TEMP2]][0], v0[0] - ; CHECK: str q[[TEMP2]], [x0] + ; CHECK: fmov.4s v[[TEMP:[0-9]+]], #1.0 + ; CHECK: mov.s v[[TEMP]][0], v0[0] + ; CHECK: str q[[TEMP]], [x0] ; CHECK: ret } diff --git a/test/CodeGen/AArch64/arm64-vector-ldst.ll b/test/CodeGen/AArch64/arm64-vector-ldst.ll index 938b3d1d0593..6e530cb258b7 100644 --- a/test/CodeGen/AArch64/arm64-vector-ldst.ll +++ b/test/CodeGen/AArch64/arm64-vector-ldst.ll @@ -264,149 +264,196 @@ entry: ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q ; registers for unscaled vector accesses -@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1 -define <1 x i64> @fct0() nounwind readonly ssp { +define <1 x i64> @fct0(i8* %str) nounwind readonly ssp { entry: ; CHECK-LABEL: fct0: ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] - %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <1 x i64>* + %0 = load <1 x i64>, <1 x i64>* %q, align 8 ret <1 x i64> %0 } -define <2 x i32> @fct1() nounwind readonly ssp { +define <2 x i32> @fct1(i8* %str) nounwind readonly ssp { entry: ; CHECK-LABEL: fct1: ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] - %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <2 x i32>* + %0 = load <2 x i32>, <2 x i32>* %q, align 8 ret <2 x i32> %0 } -define <4 x i16> @fct2() nounwind readonly ssp { +define <4 x i16> @fct2(i8* %str) nounwind readonly ssp { entry: ; CHECK-LABEL: fct2: ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] - %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <4 x i16>* + %0 = load <4 x i16>, <4 x i16>* %q, align 8 ret <4 x i16> %0 } -define <8 x i8> @fct3() nounwind readonly ssp { +define <8 x i8> @fct3(i8* %str) nounwind readonly ssp { entry: ; CHECK-LABEL: fct3: ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] - %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <8 x i8>* + %0 = load <8 x i8>, <8 x i8>* %q, align 8 ret <8 x i8> %0 } -define <2 x i64> @fct4() nounwind readonly ssp { +define <2 x i64> @fct4(i8* %str) nounwind readonly ssp { entry: ; CHECK-LABEL: fct4: ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] - %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <2 x i64>* + %0 = load <2 x i64>, <2 x i64>* %q, align 16 ret <2 x i64> %0 } -define <4 x i32> @fct5() nounwind readonly ssp { +define <4 x i32> @fct5(i8* %str) nounwind readonly ssp { entry: ; CHECK-LABEL: fct5: ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] - %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <4 x i32>* + %0 = load <4 x i32>, <4 x i32>* %q, align 16 ret <4 x i32> %0 } -define <8 x i16> @fct6() nounwind readonly ssp { +define <8 x i16> @fct6(i8* %str) nounwind readonly ssp { entry: ; CHECK-LABEL: fct6: ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] - %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <8 x i16>* + %0 = load <8 x i16>, <8 x i16>* %q, align 16 ret <8 x i16> %0 } -define <16 x i8> @fct7() nounwind readonly ssp { +define <16 x i8> @fct7(i8* %str) nounwind readonly ssp { entry: ; CHECK-LABEL: fct7: ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] - %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <16 x i8>* + %0 = load <16 x i8>, <16 x i8>* %q, align 16 ret <16 x i8> %0 } -define void @fct8() nounwind ssp { +define void @fct8(i8* %str) nounwind ssp { entry: ; CHECK-LABEL: fct8: ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 - store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <1 x i64>* + %0 = load <1 x i64>, <1 x i64>* %q, align 8 + %p2 = getelementptr inbounds i8, i8* %str, i64 4 + %q2 = bitcast i8* %p2 to <1 x i64>* + store <1 x i64> %0, <1 x i64>* %q2, align 8 ret void } -define void @fct9() nounwind ssp { +define void @fct9(i8* %str) nounwind ssp { entry: ; CHECK-LABEL: fct9: ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 - store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <2 x i32>* + %0 = load <2 x i32>, <2 x i32>* %q, align 8 + %p2 = getelementptr inbounds i8, i8* %str, i64 4 + %q2 = bitcast i8* %p2 to <2 x i32>* + store <2 x i32> %0, <2 x i32>* %q2, align 8 ret void } -define void @fct10() nounwind ssp { +define void @fct10(i8* %str) nounwind ssp { entry: ; CHECK-LABEL: fct10: ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 - store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <4 x i16>* + %0 = load <4 x i16>, <4 x i16>* %q, align 8 + %p2 = getelementptr inbounds i8, i8* %str, i64 4 + %q2 = bitcast i8* %p2 to <4 x i16>* + store <4 x i16> %0, <4 x i16>* %q2, align 8 ret void } -define void @fct11() nounwind ssp { +define void @fct11(i8* %str) nounwind ssp { entry: ; CHECK-LABEL: fct11: ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 - store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <8 x i8>* + %0 = load <8 x i8>, <8 x i8>* %q, align 8 + %p2 = getelementptr inbounds i8, i8* %str, i64 4 + %q2 = bitcast i8* %p2 to <8 x i8>* + store <8 x i8> %0, <8 x i8>* %q2, align 8 ret void } -define void @fct12() nounwind ssp { +define void @fct12(i8* %str) nounwind ssp { entry: ; CHECK-LABEL: fct12: ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 - store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <2 x i64>* + %0 = load <2 x i64>, <2 x i64>* %q, align 16 + %p2 = getelementptr inbounds i8, i8* %str, i64 4 + %q2 = bitcast i8* %p2 to <2 x i64>* + store <2 x i64> %0, <2 x i64>* %q2, align 16 ret void } -define void @fct13() nounwind ssp { +define void @fct13(i8* %str) nounwind ssp { entry: ; CHECK-LABEL: fct13: ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 - store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <4 x i32>* + %0 = load <4 x i32>, <4 x i32>* %q, align 16 + %p2 = getelementptr inbounds i8, i8* %str, i64 4 + %q2 = bitcast i8* %p2 to <4 x i32>* + store <4 x i32> %0, <4 x i32>* %q2, align 16 ret void } -define void @fct14() nounwind ssp { +define void @fct14(i8* %str) nounwind ssp { entry: ; CHECK-LABEL: fct14: ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 - store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <8 x i16>* + %0 = load <8 x i16>, <8 x i16>* %q, align 16 + %p2 = getelementptr inbounds i8, i8* %str, i64 4 + %q2 = bitcast i8* %p2 to <8 x i16>* + store <8 x i16> %0, <8 x i16>* %q2, align 16 ret void } -define void @fct15() nounwind ssp { +define void @fct15(i8* %str) nounwind ssp { entry: ; CHECK-LABEL: fct15: ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 - store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16 + %p = getelementptr inbounds i8, i8* %str, i64 3 + %q = bitcast i8* %p to <16 x i8>* + %0 = load <16 x i8>, <16 x i8>* %q, align 16 + %p2 = getelementptr inbounds i8, i8* %str, i64 4 + %q2 = bitcast i8* %p2 to <16 x i8>* + store <16 x i8> %0, <16 x i8>* %q2, align 16 ret void } diff --git a/test/CodeGen/AArch64/arm64-virtual_base.ll b/test/CodeGen/AArch64/arm64-virtual_base.ll index 4ecfde4f83e2..cac105ee4c09 100644 --- a/test/CodeGen/AArch64/arm64-virtual_base.ll +++ b/test/CodeGen/AArch64/arm64-virtual_base.ll @@ -34,8 +34,8 @@ define void @Precompute_Patch_Values(%struct.Bicubic_Patch_Struct* %Shape) { ; CHECK: Precompute_Patch_Values ; CHECK: ldr [[VAL:x[0-9]+]], [x0, #288] -; CHECK-NEXT: str [[VAL]], [sp, #232] ; CHECK-NEXT: ldr [[VAL2:q[0-9]+]], [x0, #272] +; CHECK-NEXT: str [[VAL]], [sp, #232] ; CHECK-NEXT: stur [[VAL2]], {{\[}}sp, #216] entry: %Control_Points = alloca [16 x [3 x double]], align 8 @@ -43,9 +43,9 @@ entry: %tmp14 = bitcast double* %arraydecay5.3.1 to i8* %arraydecay11.3.1 = getelementptr inbounds %struct.Bicubic_Patch_Struct, %struct.Bicubic_Patch_Struct* %Shape, i64 0, i32 12, i64 1, i64 3, i64 0 %tmp15 = bitcast double* %arraydecay11.3.1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i1 false) ret void } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll b/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll index c56d607aa812..60a62030e44b 100644 --- a/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll +++ b/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll @@ -4,8 +4,10 @@ define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind ssp { entry: ; CHECK-LABEL: t: -; CHECK: mov x0, [[REG1:x[0-9]+]] -; CHECK: mov x1, [[REG2:x[0-9]+]] +; CHECK: mov [[REG2:x[0-9]+]], x3 +; CHECK: mov [[REG1:x[0-9]+]], x2 +; CHECK: mov x0, x2 +; CHECK: mov x1, x3 ; CHECK: bl _foo ; CHECK: mov x0, [[REG1]] ; CHECK: mov x1, [[REG2]] diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll index 2fb9d3b2d030..664078fb7e94 100644 --- a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -1,27 +1,31 @@ -; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefix=CYCLONE --check-prefix=ALL -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefix=KRYO --check-prefix=ALL -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefix=FALKOR --check-prefix=ALL +; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefixes=ALL,CYCLONE +; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 < %s | FileCheck %s -check-prefixes=CYCLONE-FULLFP16 +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m1 < %s | FileCheck %s -check-prefixes=ALL,OTHERS +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m3 < %s | FileCheck %s -check-prefixes=ALL,OTHERS +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefixes=ALL,OTHERS +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefixes=ALL,OTHERS -; rdar://11481771 -; rdar://13713797 +declare void @bar(half, float, double, <2 x double>) +declare void @bari(i32, i32) +declare void @barl(i64, i64) +declare void @barf(float, float) define void @t1() nounwind ssp { entry: ; ALL-LABEL: t1: ; ALL-NOT: fmov -; CYCLONE: fmov d0, xzr -; CYCLONE: fmov d1, xzr +; ALL: ldr h0,{{.*}} +; CYCLONE: fmov s1, wzr ; CYCLONE: fmov d2, xzr -; CYCLONE: fmov d3, xzr -; KRYO: movi v0.2d, #0000000000000000 -; KRYO: movi v1.2d, #0000000000000000 -; KRYO: movi v2.2d, #0000000000000000 -; KRYO: movi v3.2d, #0000000000000000 -; FALKOR: movi v0.2d, #0000000000000000 -; FALKOR: movi v1.2d, #0000000000000000 -; FALKOR: movi v2.2d, #0000000000000000 -; FALKOR: movi v3.2d, #0000000000000000 - tail call void @bar(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) nounwind +; CYCLONE: movi.16b v3, #0 +; CYCLONE-FULLFP16: fmov h0, wzr +; CYCLONE-FULLFP16: fmov s1, wzr +; CYCLONE-FULLFP16: fmov d2, xzr +; CYCLONE-FULLFP16: movi.16b v3, #0 +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 + tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind ret void } @@ -29,8 +33,8 @@ define void @t2() nounwind ssp { entry: ; ALL-LABEL: t2: ; ALL-NOT: mov w0, wzr -; ALL: mov w0, #0 -; ALL: mov w1, #0 +; ALL: mov w{{[0-3]+}}, #0 +; ALL: mov w{{[0-3]+}}, #0 tail call void @bari(i32 0, i32 0) nounwind ret void } @@ -39,8 +43,8 @@ define void @t3() nounwind ssp { entry: ; ALL-LABEL: t3: ; ALL-NOT: mov x0, xzr -; ALL: mov x0, #0 -; ALL: mov x1, #0 +; ALL: mov x{{[0-3]+}}, #0 +; ALL: mov x{{[0-3]+}}, #0 tail call void @barl(i64 0, i64 0) nounwind ret void } @@ -48,26 +52,21 @@ entry: define void @t4() nounwind ssp { ; ALL-LABEL: t4: ; ALL-NOT: fmov -; CYCLONE: fmov s0, wzr -; CYCLONE: fmov s1, wzr -; KRYO: movi v0.2d, #0000000000000000 -; KRYO: movi v1.2d, #0000000000000000 -; FALKOR: movi v0.2d, #0000000000000000 -; FALKOR: movi v1.2d, #0000000000000000 +; CYCLONE: fmov s{{[0-3]+}}, wzr +; CYCLONE: fmov s{{[0-3]+}}, wzr +; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr +; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind ret void } -declare void @bar(double, double, double, double) -declare void @bari(i32, i32) -declare void @barl(i64, i64) -declare void @barf(float, float) - ; We used to produce spills+reloads for a Q register with zero cycle zeroing ; enabled. ; ALL-LABEL: foo: -; ALL-NOT: str {{q[0-9]+}} -; ALL-NOT: ldr {{q[0-9]+}} +; ALL-NOT: str q{{[0-9]+}} +; ALL-NOT: ldr q{{[0-9]+}} define double @foo(i32 %n) { entry: br label %for.body @@ -90,8 +89,7 @@ for.end: define <2 x i64> @t6() { ; ALL-LABEL: t6: ; CYCLONE: movi.16b v0, #0 -; KRYO: movi v0.2d, #0000000000000000 -; FALKOR: movi v0.2d, #0000000000000000 +; OTHERS: movi v0.2d, #0000000000000000 ret <2 x i64> zeroinitializer } diff --git a/test/CodeGen/AArch64/atomic-ops-lse.ll b/test/CodeGen/AArch64/atomic-ops-lse.ll index 49f716547b12..22f2c8553534 100644 --- a/test/CodeGen/AArch64/atomic-ops-lse.ll +++ b/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s --check-prefix=CHECK-REG ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mcpu=saphira < %s | FileCheck %s @@ -11,6 +12,7 @@ @var16 = global i16 0 @var32 = global i32 0 @var64 = global i64 0 +@var128 = global i128 0 define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8: @@ -629,12 +631,27 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK-NEXT: casab w0, w1, [x[[ADDR]]] +; CHECK-NEXT: ret + + ret i8 %old +} + +define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i8_1: + %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire + %success = extractvalue { i8, i1 } %pair, 1 -; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - ret i8 %old +; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]] +; CHECK-NEXT: cmp w[[NEW]], w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + ret i1 %success } define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { @@ -644,12 +661,28 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK-NEXT: casah w0, w1, [x[[ADDR]]] +; CHECK-NEXT: ret + + ret i16 %old +} + +define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i16_1: + %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire + %success = extractvalue { i16, i1 } %pair, 1 -; CHECK: casah w0, w1, [x[[ADDR]]] ; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - ret i16 %old +; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]] +; CHECK-NEXT: cmp w[[NEW]], w0, uxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + + ret i1 %success } define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { @@ -682,6 +715,21 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ret i64 %old } +define i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i128: + %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire + %old = extractvalue { i128, i1 } %pair, 0 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 + +; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i128 %old +} + define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8: %old = atomicrmw sub i8* @var8, i8 %offset seq_cst @@ -766,6 +814,118 @@ define void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind { ret void } +define i8 @test_atomic_load_sub_i8_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm: + %old = atomicrmw sub i8* @var8, i8 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_sub_i16_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm: + %old = atomicrmw sub i16* @var16, i16 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_sub_i32_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm: + %old = atomicrmw sub i32* @var32, i32 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_sub_i64_neg_imm() nounwind { +; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm: + %old = atomicrmw sub i64* @var64, i64 -1 seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1 +; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + +define i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i8_neg_arg: + %neg = sub i8 0, %offset + %old = atomicrmw sub i8* @var8, i8 %neg seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: ldaddalb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i8 %old +} + +define i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i16_neg_arg: + %neg = sub i16 0, %offset + %old = atomicrmw sub i16* @var16, i16 %neg seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: ldaddalh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i16 %old +} + +define i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i32_neg_arg: + %neg = sub i32 0, %offset + %old = atomicrmw sub i32* @var32, i32 %neg seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i32 %old +} + +define i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i64_neg_arg: + %neg = sub i64 0, %offset + %old = atomicrmw sub i64* @var64, i64 %neg seq_cst + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i64 %old +} + define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8: %old = atomicrmw and i8* @var8, i8 %offset seq_cst @@ -818,6 +978,102 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ret i64 %old } +define i8 @test_atomic_load_and_i8_inv_imm() nounwind { +; CHECK-LABEL: test_atomic_load_and_i8_inv_imm: + %old = atomicrmw and i8* @var8, i8 -2 seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: orr w[[CONST:[0-9]+]], wzr, #0x1 +; CHECK: ldclralb w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i8 %old +} + +define i16 @test_atomic_load_and_i16_inv_imm() nounwind { +; CHECK-LABEL: test_atomic_load_and_i16_inv_imm: + %old = atomicrmw and i16* @var16, i16 -2 seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: orr w[[CONST:[0-9]+]], wzr, #0x1 +; CHECK: ldclralh w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i16 %old +} + +define i32 @test_atomic_load_and_i32_inv_imm() nounwind { +; CHECK-LABEL: test_atomic_load_and_i32_inv_imm: + %old = atomicrmw and i32* @var32, i32 -2 seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: orr w[[CONST:[0-9]+]], wzr, #0x1 +; CHECK: ldclral w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i32 %old +} + +define i64 @test_atomic_load_and_i64_inv_imm() nounwind { +; CHECK-LABEL: test_atomic_load_and_i64_inv_imm: + %old = atomicrmw and i64* @var64, i64 -2 seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: orr w[[CONST:[0-9]+]], wzr, #0x1 +; CHECK: ldclral x[[CONST]], x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i64 %old +} + +define i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i8_inv_arg: + %inv = xor i8 %offset, -1 + %old = atomicrmw and i8* @var8, i8 %inv seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: ldclralb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i8 %old +} + +define i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i16_inv_arg: + %inv = xor i16 %offset, -1 + %old = atomicrmw and i16* @var16, i16 %inv seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: ldclralh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i16 %old +} + +define i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i32_inv_arg: + %inv = xor i32 %offset, -1 + %old = atomicrmw and i32* @var32, i32 %inv seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: ldclral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i32 %old +} + +define i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i64_inv_arg: + %inv = xor i64 %offset, -1 + %old = atomicrmw and i64* @var64, i64 %inv seq_cst +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: ldclral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] +; CHECK-NOT: dmb + ret i64 %old +} + define void @test_atomic_load_and_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret: atomicrmw and i32* @var32, i32 %offset seq_cst @@ -1674,6 +1930,21 @@ define i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nounwind { ret i64 %old } +define i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i128_acquire: + %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire + %old = extractvalue { i128, i1 } %pair, 0 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 + +; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i128 %old +} + define i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_monotonic: %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new monotonic monotonic @@ -1734,6 +2005,21 @@ define i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) nounwind { ret i64 %old } +define i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic: + %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new monotonic monotonic + %old = extractvalue { i128, i1 } %pair, 0 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 + +; CHECK: casp x0, x1, x2, x3, [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i128 %old +} + define i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_seq_cst: %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst seq_cst @@ -1794,6 +2080,21 @@ define i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nounwind { ret i64 %old } +define i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i128_seq_cst: + %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new seq_cst seq_cst + %old = extractvalue { i128, i1 } %pair, 0 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 + +; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]] +; CHECK-NOT: dmb + + ret i128 %old +} + define i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_acq_rel: %old = atomicrmw max i8* @var8, i8 %offset acq_rel diff --git a/test/CodeGen/AArch64/big-callframe.ll b/test/CodeGen/AArch64/big-callframe.ll new file mode 100644 index 000000000000..d5ee233095ca --- /dev/null +++ b/test/CodeGen/AArch64/big-callframe.ll @@ -0,0 +1,16 @@ +; RUN: llc -o - %s -verify-machineinstrs | FileCheck %s +; XFAIL: * +; Make sure we use a frame pointer and fp relative addressing for the emergency +; spillslot when we have gigantic callframes. +; CHECK-LABEL: func: +; CHECK: stur {{.*}}, [x29, #{{.*}}] // 8-byte Folded Spill +; CHECK: ldur {{.*}}, [x29, #{{.*}}] // 8-byte Folded Reload +target triple = "aarch64--" +declare void @extfunc([4096 x i64]* byval %p) +define void @func([4096 x i64]* %z) { + %lvar = alloca [31 x i8] + %v = load volatile [31 x i8], [31 x i8]* %lvar + store volatile [31 x i8] %v, [31 x i8]* %lvar + call void @extfunc([4096 x i64]* byval %z) + ret void +} diff --git a/test/CodeGen/AArch64/bitfield-extract.ll b/test/CodeGen/AArch64/bitfield-extract.ll index 5e727b669e22..69faf467d078 100644 --- a/test/CodeGen/AArch64/bitfield-extract.ll +++ b/test/CodeGen/AArch64/bitfield-extract.ll @@ -96,3 +96,20 @@ define void @test11(i64 %a) { } declare void @use(i16 signext, i64) + +; CHECK-LABEL: test_complex_node: +; CHECK: ldr d0, [x0], #8 +; CHECK: ubfx x[[VAL:[0-9]+]], x0, #5, #27 +; CHECK: str w[[VAL]], [x2] +define <2 x i32> @test_complex_node(<2 x i32>* %addr, <2 x i32>** %addr2, i32* %bf ) { + %vec = load <2 x i32>, <2 x i32>* %addr + + %vec.next = getelementptr <2 x i32>, <2 x i32>* %addr, i32 1 + store <2 x i32>* %vec.next, <2 x i32>** %addr2 + %lo = ptrtoint <2 x i32>* %vec.next to i32 + + %val = lshr i32 %lo, 5 + store i32 %val, i32* %bf + + ret <2 x i32> %vec +} diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll index 42b0051a2dd6..3f5841c52fd8 100644 --- a/test/CodeGen/AArch64/bitfield-insert.ll +++ b/test/CodeGen/AArch64/bitfield-insert.ll @@ -137,7 +137,7 @@ define void @test_32bit_complexmask(i32 *%existing, i32 *%new) { ret void } -; Neither mask is is a contiguous set of 1s. BFI can't be used +; Neither mask is a contiguous set of 1s. BFI can't be used define void @test_32bit_badmask(i32 *%existing, i32 *%new) { ; CHECK-LABEL: test_32bit_badmask: ; CHECK-NOT: bfi @@ -480,3 +480,20 @@ define i32 @test9(i64 %b, i32 %e) { %h = or i32 %g, %f ret i32 %h } + +; CHECK-LABEL: test_complex_type: +; CHECK: ldr d0, [x0], #8 +; CHECK: orr [[BOTH:x[0-9]+]], x0, x1, lsl #32 +; CHECK: str [[BOTH]], [x2] +define <2 x i32> @test_complex_type(<2 x i32>* %addr, i64 %in, i64* %bf ) { + %vec = load <2 x i32>, <2 x i32>* %addr + + %vec.next = getelementptr <2 x i32>, <2 x i32>* %addr, i32 1 + %lo = ptrtoint <2 x i32>* %vec.next to i64 + + %hi = shl i64 %in, 32 + %both = or i64 %lo, %hi + store i64 %both, i64* %bf + + ret <2 x i32> %vec +} diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll index 8bd1279544b8..4b60f171f4e4 100644 --- a/test/CodeGen/AArch64/bitfield.ll +++ b/test/CodeGen/AArch64/bitfield.ll @@ -31,7 +31,7 @@ define void @test_extendb64(i8 %var) { ; correct. %uxt64 = zext i8 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff ret void } @@ -63,7 +63,7 @@ define void @test_extendh64(i16 %var) { ; correct. %uxt64 = zext i16 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff ret void } diff --git a/test/CodeGen/AArch64/br-cond-not-merge.ll b/test/CodeGen/AArch64/br-cond-not-merge.ll index bf21ef307905..46532386783f 100644 --- a/test/CodeGen/AArch64/br-cond-not-merge.ll +++ b/test/CodeGen/AArch64/br-cond-not-merge.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s -; RUN: llc -mtriple=aarch64 -verify-machineinstrs -O0 -fast-isel=0 < %s | FileCheck --check-prefix=CHECK --check-prefix=NOOPT %s +; RUN: llc -mtriple=aarch64 -verify-machineinstrs -O0 -fast-isel=0 -global-isel=false < %s | FileCheck --check-prefix=CHECK --check-prefix=NOOPT %s declare void @foo() diff --git a/test/CodeGen/AArch64/branch-folder-oneinst.mir b/test/CodeGen/AArch64/branch-folder-oneinst.mir new file mode 100644 index 000000000000..cfb6da3e9ae7 --- /dev/null +++ b/test/CodeGen/AArch64/branch-folder-oneinst.mir @@ -0,0 +1,29 @@ +# RUN: llc -o - %s -mtriple=aarch64 -run-pass branch-folder -verify-machineinstrs | FileCheck %s +# Check that BranchFolding pass is able to hoist a common instruction into a block with a single branch instruction. +name: func +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: func + ; CHECK-LABEL: bb.0: + ; CHECK: $x0 = ADDXri $x0, 1, 0 + ; CHECK: CBZX $x1, %bb.2 + liveins: $x0, $x1 + CBZX $x1, %bb.2 + + bb.1: + ; CHECK-LABEL: bb.1: + ; CHECK-NOT: $x0 = ADDXri $x0, 1, 0 + liveins: $x0 + $x0 = ADDXri $x0, 1, 0 + $x0 = ADDXri $x0, 2, 0 + RET_ReallyLR implicit $x0 + + bb.2: + ; CHECK-LABEL: bb.2: + ; CHECK-NOT: $x0 = ADDXri $x0, 1, 0 + liveins: $x0 + $x0 = ADDXri $x0, 1, 0 + $x0 = ADDXri $x0, 3, 0 + RET_ReallyLR implicit $x0 +... diff --git a/test/CodeGen/AArch64/build-one-lane.ll b/test/CodeGen/AArch64/build-one-lane.ll new file mode 100644 index 000000000000..55225975c515 --- /dev/null +++ b/test/CodeGen/AArch64/build-one-lane.ll @@ -0,0 +1,272 @@ +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +; Check that building up a vector w/ only one non-zero lane initializes +; efficiently. + +define <8 x i8> @v8i8z(i8 %t, i8 %s) nounwind { + %v = insertelement <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 7 + ret <8 x i8> %v + +; CHECK-LABEL: v8i8z +; CHECK: movi d[[R:[0-9]+]], #0 +; CHECK: mov v[[R]].b[7], w{{[0-9]+}} +} + +define <16 x i8> @v16i8z(i8 %t, i8 %s) nounwind { + %v = insertelement <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 15 + ret <16 x i8> %v + +; CHECK-LABEL: v16i8z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 +; CHECK: mov v[[R]].b[15], w{{[0-9]+}} +} + +define <4 x i16> @v4i16z(i16 %t, i16 %s) nounwind { + %v = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 3 + ret <4 x i16> %v + +; CHECK-LABEL: v4i16z: +; CHECK: movi d[[R:[0-9]+]], #0 +; CHECK: mov v[[R]].h[3], w{{[0-9]+}} +} + +define <8 x i16> @v8i16z(i16 %t, i16 %s) nounwind { + %v = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 7 + ret <8 x i16> %v + +; CHECK-LABEL: v8i16z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 +; CHECK: mov v[[R]].h[7], w{{[0-9]+}} +} + +define <2 x i32> @v2i32z(i32 %t, i32 %s) nounwind { + %v = insertelement <2 x i32> <i32 0, i32 undef>, i32 %s, i32 1 + ret <2 x i32> %v + +; CHECK-LABEL: v2i32z: +; CHECK: movi d[[R:[0-9]+]], #0 +; CHECK: mov v[[R]].s[1], w{{[0-9]+}} +} + +define <4 x i32> @v4i32z(i32 %t, i32 %s) nounwind { + %v = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, i32 %s, i32 3 + ret <4 x i32> %v + +; CHECK-LABEL: v4i32z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 +; CHECK: mov v[[R]].s[3], w{{[0-9]+}} +} + +define <2 x i64> @v2i64z(i64 %t, i64 %s) nounwind { + %v = insertelement <2 x i64> <i64 0, i64 undef>, i64 %s, i32 1 + ret <2 x i64> %v + +; CHECK-LABEL: v2i64z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 +; CHECK: mov v[[R]].d[1], x{{[0-9]+}} +} + +define <2 x float> @v2f32z(float %t, float %s) nounwind { + %v = insertelement <2 x float> <float 0.0, float undef>, float %s, i32 1 + ret <2 x float> %v + +; CHECK-LABEL: v2f32z: +; CHECK: movi d[[R:[0-9]+]], #0 +; CHECK: mov v[[R]].s[1], v{{[0-9]+}}.s[0] +} + +define <4 x float> @v4f32z(float %t, float %s) nounwind { + %v = insertelement <4 x float> <float 0.0, float 0.0, float 0.0, float undef>, float %s, i32 3 + ret <4 x float> %v + +; CHECK-LABEL: v4f32z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 +; CHECK: mov v[[R]].s[3], v{{[0-9]+}}.s[0] +} + +define <2 x double> @v2f64z(double %t, double %s) nounwind { + %v = insertelement <2 x double> <double 0.0, double undef>, double %s, i32 1 + ret <2 x double> %v + +; CHECK-LABEL: v2f64z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 +; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0] +} + +; Check that building up a vector w/ only one non-ones lane initializes +; efficiently. + +define <8 x i8> @v8i8m(i8 %t, i8 %s) nounwind { + %v = insertelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 undef>, i8 %s, i32 7 + ret <8 x i8> %v + +; CHECK-LABEL: v8i8m +; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff +; CHECK: mov v[[R]].b[7], w{{[0-9]+}} +} + +define <16 x i8> @v16i8m(i8 %t, i8 %s) nounwind { + %v = insertelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 undef>, i8 %s, i32 15 + ret <16 x i8> %v + +; CHECK-LABEL: v16i8m +; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff +; CHECK: mov v[[R]].b[15], w{{[0-9]+}} +} + +define <4 x i16> @v4i16m(i16 %t, i16 %s) nounwind { + %v = insertelement <4 x i16> <i16 -1, i16 -1, i16 -1, i16 undef>, i16 %s, i32 3 + ret <4 x i16> %v + +; CHECK-LABEL: v4i16m +; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff +; CHECK: mov v[[R]].h[3], w{{[0-9]+}} +} + +define <8 x i16> @v8i16m(i16 %t, i16 %s) nounwind { + %v = insertelement <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 undef>, i16 %s, i32 7 + ret <8 x i16> %v + +; CHECK-LABEL: v8i16m +; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff +; CHECK: mov v[[R]].h[7], w{{[0-9]+}} +} + +define <2 x i32> @v2i32m(i32 %t, i32 %s) nounwind { + %v = insertelement <2 x i32> <i32 -1, i32 undef>, i32 %s, i32 1 + ret <2 x i32> %v + +; CHECK-LABEL: v2i32m +; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff +; CHECK: mov v[[R]].s[1], w{{[0-9]+}} +} + +define <4 x i32> @v4i32m(i32 %t, i32 %s) nounwind { + %v = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, i32 %s, i32 3 + ret <4 x i32> %v + +; CHECK-LABEL: v4i32m +; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff +; CHECK: mov v[[R]].s[3], w{{[0-9]+}} +} + +define <2 x i64> @v2i64m(i64 %t, i64 %s) nounwind { + %v = insertelement <2 x i64> <i64 -1, i64 undef>, i64 %s, i32 1 + ret <2 x i64> %v + +; CHECK-LABEL: v2i64m +; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff +; CHECK: mov v[[R]].d[1], x{{[0-9]+}} +} + +; Check that building up a vector w/ some constants initializes efficiently. + +define void @v8i8st(<8 x i8>* %p, i8 %s) nounwind { + %v = insertelement <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 undef>, i8 %s, i32 7 + store <8 x i8> %v, <8 x i8>* %p, align 8 + ret void + +; CHECK-LABEL: v8i8st: +; CHECK: movi v[[R:[0-9]+]].8b, #1 +; CHECK: mov v[[R]].b[7], w{{[0-9]+}} +; CHECK: str d[[R]], [x{{[0-9]+}}] +} + +define void @v16i8st(<16 x i8>* %p, i8 %s) nounwind { + %v = insertelement <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 undef>, i8 %s, i32 15 + store <16 x i8> %v, <16 x i8>* %p, align 16 + ret void + +; CHECK-LABEL: v16i8st: +; CHECK: movi v[[R:[0-9]+]].16b, #128 +; CHECK: mov v[[R]].b[15], w{{[0-9]+}} +; CHECK: str q[[R]], [x{{[0-9]+}}] +} + +define void @v4i16st(<4 x i16>* %p, i16 %s) nounwind { + %v = insertelement <4 x i16> <i16 21760, i16 21760, i16 21760, i16 undef>, i16 %s, i32 3 + store <4 x i16> %v, <4 x i16>* %p, align 8 + ret void + +; CHECK-LABEL: v4i16st: +; CHECK: movi v[[R:[0-9]+]].4h, #85, lsl #8 +; CHECK: mov v[[R]].h[3], w{{[0-9]+}} +; CHECK: str d[[R]], [x{{[0-9]+}}] +} + +define void @v8i16st(<8 x i16>* %p, i16 %s) nounwind { + %v = insertelement <8 x i16> <i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 undef>, i16 %s, i32 7 + store <8 x i16> %v, <8 x i16>* %p, align 16 + ret void + +; CHECK-LABEL: v8i16st: +; CHECK: mvni v[[R:[0-9]+]].8h, #85, lsl #8 +; CHECK: mov v[[R]].h[7], w{{[0-9]+}} +; CHECK: str q[[R]], [x{{[0-9]+}}] +} + +define void @v2i32st(<2 x i32>* %p, i32 %s) nounwind { + %v = insertelement <2 x i32> <i32 983040, i32 undef>, i32 %s, i32 1 + store <2 x i32> %v, <2 x i32>* %p, align 8 + ret void + +; CHECK-LABEL: v2i32st: +; CHECK: movi v[[R:[0-9]+]].2s, #15, lsl #16 +; CHECK: mov v[[R]].s[1], w{{[0-9]+}} +; CHECK: str d[[R]], [x{{[0-9]+}}] +} + +define void @v4i32st(<4 x i32>* %p, i32 %s) nounwind { + %v = insertelement <4 x i32> <i32 16318463, i32 16318463, i32 16318463, i32 undef>, i32 %s, i32 3 + store <4 x i32> %v, <4 x i32>* %p, align 16 + ret void + +; CHECK-LABEL: v4i32st: +; CHECK: movi v[[R:[0-9]+]].4s, #248, msl #16 +; CHECK: mov v[[R]].s[3], w{{[0-9]+}} +; CHECK: str q[[R]], [x{{[0-9]+}}] +} + +define void @v2i64st(<2 x i64>* %p, i64 %s) nounwind { + %v = insertelement <2 x i64> <i64 13835058055282163712, i64 undef>, i64 %s, i32 1 + store <2 x i64> %v, <2 x i64>* %p, align 16 + ret void + +; CHECK-LABEL: v2i64st: +; CHECK: fmov v[[R:[0-9]+]].2d, #-2.0 +; CHECK: mov v[[R]].d[1], x{{[0-9]+}} +; CHECK: str q[[R]], [x{{[0-9]+}}] +} + +define void @v2f32st(<2 x float>* %p, float %s) nounwind { + %v = insertelement <2 x float> <float 2.0, float undef>, float %s, i32 1 + store <2 x float> %v, <2 x float>* %p, align 8 + ret void + +; CHECK-LABEL: v2f32st: +; CHECK: movi v[[R:[0-9]+]].2s, #64, lsl #24 +; CHECK: mov v[[R]].s[1], v{{[0-9]+}}.s[0] +; CHECK: str d[[R]], [x{{[0-9]+}}] +} + +define void @v4f32st(<4 x float>* %p, float %s) nounwind { + %v = insertelement <4 x float> <float -2.0, float -2.0, float -2.0, float undef>, float %s, i32 3 + store <4 x float> %v, <4 x float>* %p, align 16 + ret void + +; CHECK-LABEL: v4f32st: +; CHECK: movi v[[R:[0-9]+]].4s, #192, lsl #24 +; CHECK: mov v[[R]].s[3], v{{[0-9]+}}.s[0] +; CHECK: str q[[R]], [x{{[0-9]+}}] +} + +define void @v2f64st(<2 x double>* %p, double %s) nounwind { + %v = insertelement <2 x double> <double 2.0, double undef>, double %s, i32 1 + store <2 x double> %v, <2 x double>* %p, align 16 + ret void + +; CHECK-LABEL: v2f64st: +; CHECK: fmov v[[R:[0-9]+]].2d, #2.0 +; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0] +; CHECK: str q[[R]], [x{{[0-9]+}}] +} diff --git a/test/CodeGen/AArch64/build-pair-isel.ll b/test/CodeGen/AArch64/build-pair-isel.ll new file mode 100644 index 000000000000..c9c509801738 --- /dev/null +++ b/test/CodeGen/AArch64/build-pair-isel.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=aarch64 -o - -O0 %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios5.0.0" + +; This test checks we don't fail isel due to unhandled build_pair nodes. +; CHECK: bfi +define void @compare_and_swap128() { + %1 = call i128 asm sideeffect "nop", "=r,~{memory}"() + store i128 %1, i128* undef, align 16 + ret void +} + + diff --git a/test/CodeGen/AArch64/ccmp-successor-probs.mir b/test/CodeGen/AArch64/ccmp-successor-probs.mir index 8e81c419841b..df27fc499b81 100644 --- a/test/CodeGen/AArch64/ccmp-successor-probs.mir +++ b/test/CodeGen/AArch64/ccmp-successor-probs.mir @@ -6,7 +6,7 @@ # CHECK-LABEL: name: aarch64-ccmp-successor-probs # CHECK: bb.0: # CHECK-NEXT: successors: %bb.2(0x04000000), %bb.3(0x7c000000) -# CHECK: CCMPXr %5, %4, 0, 10, implicit-def %nzcv, implicit %nzcv +# CHECK: CCMPXr %5, %4, 0, 10, implicit-def $nzcv, implicit $nzcv # name: aarch64-ccmp-successor-probs registers: @@ -22,21 +22,21 @@ body : | bb.0: successors: %bb.1(0x7e000000), %bb.2(0x02000000) - %0 = LDRXui killed %x0, 69 - %1 = COPY %xzr - %2 = SUBSXrr %1, %0, implicit-def dead %nzcv - %3 = SUBSXri %x1, 1, 0, implicit-def dead %nzcv + %0 = LDRXui killed $x0, 69 + %1 = COPY $xzr + %2 = SUBSXrr %1, %0, implicit-def dead $nzcv + %3 = SUBSXri $x1, 1, 0, implicit-def dead $nzcv %4 = COPY %0 %5 = COPY %3 - %6 = SUBSXrr %x1, killed %2, implicit-def %nzcv - Bcc 11, %bb.2, implicit %nzcv + %6 = SUBSXrr $x1, killed %2, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv B %bb.1 bb.1: successors: %bb.2(0x02082082), %bb.3(0x7df7df7e) - %7 = SUBSXrr %5, %4, implicit-def %nzcv - Bcc 12, %bb.2, implicit %nzcv + %7 = SUBSXrr %5, %4, implicit-def $nzcv + Bcc 12, %bb.2, implicit $nzcv B %bb.3 bb.2: diff --git a/test/CodeGen/AArch64/cfi_restore.mir b/test/CodeGen/AArch64/cfi_restore.mir index 92351f99cd28..6d93411d322a 100644 --- a/test/CodeGen/AArch64/cfi_restore.mir +++ b/test/CodeGen/AArch64/cfi_restore.mir @@ -9,29 +9,29 @@ frameInfo: hasCalls: true stack: - { id: 0, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: 0, - callee-saved-register: '%lr' } + callee-saved-register: '$lr' } - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 8, stack-id: 0, - callee-saved-register: '%fp' } + callee-saved-register: '$fp' } body: | bb.0: - liveins: %fp, %lr + liveins: $fp, $lr - %sp = frame-setup SUBXri %sp, 16, 0 - frame-setup STRXui killed %fp, %sp, 0 :: (store 8 into %stack.1) - frame-setup CFI_INSTRUCTION offset %w29, -16 + $sp = frame-setup SUBXri $sp, 16, 0 + frame-setup STRXui killed $fp, $sp, 0 :: (store 8 into %stack.1) + frame-setup CFI_INSTRUCTION offset $w29, -16 ; CHECK: .cfi_offset w29, -16 - frame-setup STRXui killed %lr, %sp, 1 :: (store 8 into %stack.0) - frame-setup CFI_INSTRUCTION offset %w30, -8 + frame-setup STRXui killed $lr, $sp, 1 :: (store 8 into %stack.0) + frame-setup CFI_INSTRUCTION offset $w30, -8 ; CHECK: .cfi_offset w30, -8 - %fp = frame-setup ADDXri %sp, 0, 0 - frame-setup CFI_INSTRUCTION def_cfa %w29, 16 - %lr = LDRXui %sp, 1 :: (load 8 from %stack.0) - CFI_INSTRUCTION restore %w30 + $fp = frame-setup ADDXri $sp, 0, 0 + frame-setup CFI_INSTRUCTION def_cfa $w29, 16 + $lr = LDRXui $sp, 1 :: (load 8 from %stack.0) + CFI_INSTRUCTION restore $w30 ; CHECK: .cfi_restore w30 - %fp = LDRXui %sp, 0 :: (load 8 from %stack.1) - CFI_INSTRUCTION restore %w29 + $fp = LDRXui $sp, 0 :: (load 8 from %stack.1) + CFI_INSTRUCTION restore $w29 ; CHECK: .cfi_restore w29 - %sp = ADDXri %sp, 16, 0 + $sp = ADDXri $sp, 16, 0 RET_ReallyLR ; CHECK: .cfi_endproc ... diff --git a/test/CodeGen/AArch64/cmpwithshort.ll b/test/CodeGen/AArch64/cmpwithshort.ll index 8a94689adc94..a0475c4efcce 100644 --- a/test/CodeGen/AArch64/cmpwithshort.ll +++ b/test/CodeGen/AArch64/cmpwithshort.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -O3 -mtriple=aarch64-eabi | FileCheck %s define i16 @test_1cmp_signed_1(i16* %ptr1) { -; CHECK-LABLE: @test_1cmp_signed_1 +; CHECK-LABEL: @test_1cmp_signed_1 ; CHECK: ldrsh ; CHECK-NEXT: cmn entry: @@ -16,7 +16,7 @@ if.then: } define i16 @test_1cmp_signed_2(i16* %ptr1) { -; CHECK-LABLE: @test_1cmp_signed_2 +; CHECK-LABEL: @test_1cmp_signed_2 ; CHECK: ldrsh ; CHECK-NEXT: cmn entry: @@ -31,7 +31,7 @@ if.then: } define i16 @test_1cmp_unsigned_1(i16* %ptr1) { -; CHECK-LABLE: @test_1cmp_unsigned_1 +; CHECK-LABEL: @test_1cmp_unsigned_1 ; CHECK: ldrsh ; CHECK-NEXT: cmn entry: diff --git a/test/CodeGen/AArch64/cmpxchg-O0.ll b/test/CodeGen/AArch64/cmpxchg-O0.ll index 1bfbcf851c0e..bd3d328ec119 100644 --- a/test/CodeGen/AArch64/cmpxchg-O0.ll +++ b/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: diff --git a/test/CodeGen/AArch64/cmpxchg-idioms.ll b/test/CodeGen/AArch64/cmpxchg-idioms.ll index da0f7073acef..5ff3ddfe09a4 100644 --- a/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -45,8 +45,7 @@ define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) { ; CHECK: [[FAILED]]: ; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK: mov [[TMP:w[0-9]+]], wzr -; CHECK: eor w0, [[TMP]], #0x1 +; CHECK: eor w0, wzr, #0x1 ; CHECK: ret %pair = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic diff --git a/test/CodeGen/AArch64/copyprop.mir b/test/CodeGen/AArch64/copyprop.mir new file mode 100644 index 000000000000..e23002c56907 --- /dev/null +++ b/test/CodeGen/AArch64/copyprop.mir @@ -0,0 +1,104 @@ +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass machine-cp -o - %s | FileCheck %s +# Tests for MachineCopyPropagation copy forwarding. +--- +# Simple forwarding. +# CHECK-LABEL: name: test1 +# CHECK: $x0 = SUBXri $x0, 1, 0 +name: test1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + renamable $x1 = COPY $x0 + $x0 = SUBXri renamable $x1, 1, 0 +... +--- +# Don't forward if not renamable. +# CHECK-LABEL: name: test2 +# CHECK: $x0 = SUBXri $x1, 1, 0 +name: test2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + $x1 = COPY $x0 + $x0 = SUBXri $x1, 1, 0 +... +--- +# Don't forward reserved non-constant reg values. +# CHECK-LABEL: name: test4 +# CHECK: $x0 = SUBXri renamable $x1, 1, 0 +name: test4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + $sp = SUBXri $sp, 16, 0 + renamable $x1 = COPY $sp + $x0 = SUBXri renamable $x1, 1, 0 + $sp = ADDXri $sp, 16, 0 +... +--- +# Don't violate opcode constraints when forwarding. +# CHECK-LABEL: name: test5 +# CHECK: $x0 = SUBXri renamable $x1, 1, 0 +name: test5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + renamable $x1 = COPY $xzr + $x0 = SUBXri renamable $x1, 1, 0 +... +--- +# Test cross-class COPY forwarding. +# CHECK-LABEL: name: test6 +# CHECK: $x2 = COPY $x0 +name: test6 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + renamable $d1 = COPY $x0 + $x2 = COPY renamable $d1 + RET_ReallyLR implicit $x2 +... +--- +# Don't forward if there are overlapping implicit operands. +# CHECK-LABEL: name: test7 +# CHECK: $w0 = SUBWri killed renamable $w1, 1, 0 +name: test7 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + renamable $w1 = COPY $w0 + $w0 = SUBWri killed renamable $w1, 1, 0, implicit killed $x1 +... +--- +# Check that kill flags are cleared. +# CHECK-LABEL: name: test8 +# CHECK: $x2 = ADDXri $x0, 1, 0 +# CHECK: $x0 = SUBXri $x0, 1, 0 +name: test8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + renamable $x1 = COPY $x0 + $x2 = ADDXri killed $x0, 1, 0 + $x0 = SUBXri renamable $x1, 1, 0 +... +--- +# Don't forward if value is clobbered. +# CHECK-LABEL: name: test9 +# CHECK: $x2 = SUBXri renamable $x1, 1, 0 +name: test9 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + renamable $x1 = COPY $x0 + $x0 = ADDXri $x0, 1, 0 + $x2 = SUBXri renamable $x1, 1, 0 +... diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll index 5c8ac87f58b0..0e21903c2760 100644 --- a/test/CodeGen/AArch64/cpus.ll +++ b/test/CodeGen/AArch64/cpus.ll @@ -12,6 +12,7 @@ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m1 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m2 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m3 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=exynos-m4 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=falkor 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=saphira 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=kryo 2>&1 | FileCheck %s diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll index a36aad51ca82..d179eab7e8fa 100644 --- a/test/CodeGen/AArch64/cxx-tlscc.ll +++ b/test/CodeGen/AArch64/cxx-tlscc.ll @@ -3,7 +3,7 @@ ; Shrink wrapping currently does not kick in because we have a TLS CALL ; in the entry block and it will clobber the link register. -; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 -fast-isel | FileCheck --check-prefix=CHECK-O0 %s %struct.S = type { i8 } diff --git a/test/CodeGen/AArch64/dllexport.ll b/test/CodeGen/AArch64/dllexport.ll index 287c545610c0..f408620e26d0 100644 --- a/test/CodeGen/AArch64/dllexport.ll +++ b/test/CodeGen/AArch64/dllexport.ll @@ -40,35 +40,34 @@ define weak_odr dllexport void @l() { ; CHECK: .section .drectve ; CHECK-GNU-NOT: -export:f -; CHECK-GNU: -export:g -; CHECK-GNU-SAME: -export:h +; CHECK-GNU: .ascii " -export:g" +; CHECK-GNU: .ascii " -export:h" ; CHECK-GNU-NOT: -export:i -; CHECK-GNU-SAME: -export:j -; CHECK-GNU-SAME: -export:k -; CHECK-GNU-SAME: -export:l -; CHECK-GNU-SAME: -export:m,data -; CHECK-GNU-SAME: -export:n,data -; CHECK-GNU-SAME: -export:o,data -; CHECK-GNU-SAME: -export:p,data -; CHECK-GNU-SAME: -export:q,data -; CHECK-GNU-SAME: -export:r -; CHECK-GNU-SAME: -export:s -; CHECK-GNU-SAME: -export:t -; CHECK-GNU-SAME: -export:u +; CHECK-GNU: .ascii " -export:j" +; CHECK-GNU: .ascii " -export:k" +; CHECK-GNU: .ascii " -export:l" +; CHECK-GNU: .ascii " -export:m,data" +; CHECK-GNU: .ascii " -export:n,data" +; CHECK-GNU: .ascii " -export:o,data" +; CHECK-GNU: .ascii " -export:p,data" +; CHECK-GNU: .ascii " -export:q,data" +; CHECK-GNU: .ascii " -export:r" +; CHECK-GNU: .ascii " -export:s" +; CHECK-GNU: .ascii " -export:t" +; CHECK-GNU: .ascii " -export:u" ; CHECK-MSVC-NOT: /EXPORT:f -; CHECK-MSVC: /EXPORT:g -; CHECK-MSVC-SAME: /EXPORT:h +; CHECK-MSVC: .ascii " /EXPORT:g" +; CHECK-MSVC: .ascii " /EXPORT:h" ; CHECK-MSVC-NOT: /EXPORT:i -; CHECK-MSVC-SAME: /EXPORT:j -; CHECK-MSVC-SAME: /EXPORT:k -; CHECK-MSVC-SAME: /EXPORT:l -; CHECK-MSVC-SAME: /EXPORT:m,DATA -; CHECK-MSVC-SAME: /EXPORT:n,DATA -; CHECK-MSVC-SAME: /EXPORT:o,DATA -; CHECK-MSVC-SAME: /EXPORT:p,DATA -; CHECK-MSVC-SAME: /EXPORT:q,DATA -; CHECK-MSVC-SAME: /EXPORT:r -; CHECK-MSVC-SAME: /EXPORT:s -; CHECK-MSVC-SAME: /EXPORT:t -; CHECK-MSVC-SAME: /EXPORT:u - +; CHECK-MSVC: .ascii " /EXPORT:j" +; CHECK-MSVC: .ascii " /EXPORT:k" +; CHECK-MSVC: .ascii " /EXPORT:l" +; CHECK-MSVC: .ascii " /EXPORT:m,DATA" +; CHECK-MSVC: .ascii " /EXPORT:n,DATA" +; CHECK-MSVC: .ascii " /EXPORT:o,DATA" +; CHECK-MSVC: .ascii " /EXPORT:p,DATA" +; CHECK-MSVC: .ascii " /EXPORT:q,DATA" +; CHECK-MSVC: .ascii " /EXPORT:r" +; CHECK-MSVC: .ascii " /EXPORT:s" +; CHECK-MSVC: .ascii " /EXPORT:t" +; CHECK-MSVC: .ascii " /EXPORT:u" diff --git a/test/CodeGen/AArch64/dllimport.ll b/test/CodeGen/AArch64/dllimport.ll index fad049a54cd2..281c847a39a5 100644 --- a/test/CodeGen/AArch64/dllimport.ll +++ b/test/CodeGen/AArch64/dllimport.ll @@ -1,4 +1,6 @@ -; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s +; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,DAG-ISEL +; RUN: llc -mtriple aarch64-unknown-windows-msvc -fast-isel -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,FAST-ISEL +; RUN: llc -mtriple aarch64-unknown-windows-msvc -O0 -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,GLOBAL-ISEL,GLOBAL-ISEL-FALLBACK @var = external dllimport global i32 @ext = external global i32 @@ -23,7 +25,11 @@ define i32 @get_ext() { ; CHECK-LABEL: get_ext ; CHECK: adrp x8, ext -; CHECK: ldr w0, [x8, ext] +; DAG-ISEL: ldr w0, [x8, ext] +; FAST-ISEL: add x8, x8, ext +; FAST-ISEL: ldr w0, [x8] +; GLOBAL-ISEL-FALLBACK: add x8, x8, ext +; GLOBAL-ISEL-FALLBACK: ldr w0, [x8] ; CHECK: ret define i32* @get_var_pointer() { @@ -31,8 +37,8 @@ define i32* @get_var_pointer() { } ; CHECK-LABEL: get_var_pointer -; CHECK: adrp x0, __imp_var -; CHECK: ldr x0, [x0, __imp_var] +; CHECK: adrp [[REG1:x[0-9]+]], __imp_var +; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]], __imp_var] ; CHECK: ret define i32 @call_external() { @@ -51,4 +57,6 @@ define i32 @call_internal() { } ; CHECK-LABEL: call_internal -; CHECK: b internal +; DAG-ISEL: b internal +; FAST-ISEL: b internal +; GLOBAL-ISEL: bl internal diff --git a/test/CodeGen/AArch64/emutls.ll b/test/CodeGen/AArch64/emutls.ll index 36b0ae47bd4a..c322058a5f61 100644 --- a/test/CodeGen/AArch64/emutls.ll +++ b/test/CodeGen/AArch64/emutls.ll @@ -1,5 +1,7 @@ ; RUN: llc -emulated-tls -mtriple=aarch64-linux-android \ ; RUN: -relocation-model=pic -disable-fp-elim < %s | FileCheck -check-prefix=ARM64 %s +; RUN: llc -mtriple=aarch64-linux-android \ +; RUN: -relocation-model=pic -disable-fp-elim < %s | FileCheck -check-prefix=ARM64 %s ; Copied from X86/emutls.ll diff --git a/test/CodeGen/AArch64/emutls_generic.ll b/test/CodeGen/AArch64/emutls_generic.ll index f205078ed411..840833972881 100644 --- a/test/CodeGen/AArch64/emutls_generic.ll +++ b/test/CodeGen/AArch64/emutls_generic.ll @@ -9,6 +9,18 @@ ; RUN: llc < %s -emulated-tls -mtriple=aarch64-apple-darwin -O3 \ ; RUN: | FileCheck -check-prefix=DARWIN %s +; RUN: llc < %s -mtriple=aarch64-linux-android -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -mtriple=aarch64-linux-android -relocation-model=pic -O3 \ +; RUN: | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -mtriple=aarch64-linux-android -O3 \ +; RUN: | FileCheck -check-prefix=ARM_64 %s +; aarch64-windows-gnu needs explicit -emulated-tls +; RUN: llc < %s -mtriple=aarch64-apple-darwin -O3 \ +; RUN: | FileCheck -check-prefix=NoEMU %s + +; NoEMU-NOT: __emutls + ; Make sure that TLS symbols are emitted in expected order. @external_x = external thread_local global i32, align 8 diff --git a/test/CodeGen/AArch64/expand-select.ll b/test/CodeGen/AArch64/expand-select.ll new file mode 100644 index 000000000000..da1e8dcbafed --- /dev/null +++ b/test/CodeGen/AArch64/expand-select.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -O3 %s -o - | FileCheck %s + +define void @foo(i32 %In1, <2 x i128> %In2, <2 x i128> %In3, <2 x i128> *%Out) { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: +; CHECK-NEXT: and w9, w0, #0x1 +; CHECK-NEXT: fmov s0, wzr +; CHECK-NEXT: ldp x10, x8, [sp, #8] +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldr x9, [sp] +; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s +; CHECK-NEXT: fmov w11, s0 +; CHECK-NEXT: tst w11, #0x1 +; CHECK-NEXT: csel x11, x2, x6, ne +; CHECK-NEXT: csel x12, x3, x7, ne +; CHECK-NEXT: csel x9, x4, x9, ne +; CHECK-NEXT: csel x10, x5, x10, ne +; CHECK-NEXT: stp x9, x10, [x8, #16] +; CHECK-NEXT: stp x11, x12, [x8] +; CHECK-NEXT: ret + %cond = and i32 %In1, 1 + %cbool = icmp eq i32 %cond, 0 + %res = select i1 %cbool, <2 x i128> %In2, <2 x i128> %In3 + store <2 x i128> %res, <2 x i128> *%Out + + ret void +} + +; Check case when scalar size is not power of 2. +define void @bar(i32 %In1, <2 x i96> %In2, <2 x i96> %In3, <2 x i96> *%Out) { +; CHECK-LABEL: bar: +; CHECK: // %bb.0: +; CHECK-NEXT: and w10, w0, #0x1 +; CHECK-NEXT: fmov s0, wzr +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ldp x11, x8, [sp, #8] +; CHECK-NEXT: ldr x9, [sp] +; CHECK-NEXT: dup v1.4s, v0.s[0] +; CHECK-NEXT: mov x10, v1.d[1] +; CHECK-NEXT: lsr x10, x10, #32 +; CHECK-NEXT: tst w10, #0x1 +; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: csel x11, x5, x11, ne +; CHECK-NEXT: csel x9, x4, x9, ne +; CHECK-NEXT: tst w10, #0x1 +; CHECK-NEXT: csel x10, x3, x7, ne +; CHECK-NEXT: csel x12, x2, x6, ne +; CHECK-NEXT: stur x9, [x8, #12] +; CHECK-NEXT: str x12, [x8] +; CHECK-NEXT: str w10, [x8, #8] +; CHECK-NEXT: str w11, [x8, #20] +; CHECK-NEXT: ret + %cond = and i32 %In1, 1 + %cbool = icmp eq i32 %cond, 0 + %res = select i1 %cbool, <2 x i96> %In2, <2 x i96> %In3 + store <2 x i96> %res, <2 x i96> *%Out + + ret void +} diff --git a/test/CodeGen/AArch64/extract-lowbits.ll b/test/CodeGen/AArch64/extract-lowbits.ll new file mode 100644 index 000000000000..e669a5d9cf74 --- /dev/null +++ b/test/CodeGen/AArch64/extract-lowbits.ll @@ -0,0 +1,761 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; *Please* keep in sync with test/CodeGen/X86/extract-lowbits.ll + +; https://bugs.llvm.org/show_bug.cgi?id=36419 +; https://bugs.llvm.org/show_bug.cgi?id=37603 +; https://bugs.llvm.org/show_bug.cgi?id=37610 + +; Patterns: +; a) x & (1 << nbits) - 1 +; b) x & ~(-1 << nbits) +; c) x & (-1 >> (32 - y)) +; d) x << (32 - y) >> (32 - y) +; are equivalent. + +; ---------------------------------------------------------------------------- ; +; Pattern a. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_a0: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: sub w8, w8, #1 // =1 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_a1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: sub w8, w8, #1 // =1 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_a2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: lsl w9, w9, w1 +; CHECK-NEXT: sub w9, w9, #1 // =1 +; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_a3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: lsl w9, w9, w1 +; CHECK-NEXT: sub w9, w9, #1 // =1 +; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_a4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: sub w8, w8, #1 // =1 +; CHECK-NEXT: and w0, w0, w8 +; CHECK-NEXT: ret + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +; 64-bit + +define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_a0: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: ret + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_a1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: ret + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_a2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: lsl x9, x9, x1 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_a3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: lsl x9, x9, x1 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_a4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: and x0, x0, x8 +; CHECK-NEXT: ret + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +; ---------------------------------------------------------------------------- ; +; Pattern b. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_b0: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_b1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_b2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsl w9, w9, w1 +; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_b3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsl w9, w9, w1 +; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_b4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +; 64-bit + +define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_b0: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic x0, x0, x8 +; CHECK-NEXT: ret + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_b1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic x0, x0, x8 +; CHECK-NEXT: ret + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_b2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsl x9, x9, x1 +; CHECK-NEXT: bic x0, x8, x9 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_b3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: lsl x9, x9, x1 +; CHECK-NEXT: bic x0, x8, x9 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_b4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic x0, x0, x8 +; CHECK-NEXT: ret + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +; ---------------------------------------------------------------------------- ; +; Pattern c. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_c0: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_c1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x20 +; CHECK-NEXT: sub w8, w8, w1 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_c2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_c3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x20 +; CHECK-NEXT: sub w9, w9, w1 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_c4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w0, w0, w8 +; CHECK-NEXT: ret + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +; 64-bit + +define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_c0: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_c1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x40 +; CHECK-NEXT: sub w8, w8, w1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: ret + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_c2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: mov x10, #-1 +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_c3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x40 +; CHECK-NEXT: sub w9, w9, w1 +; CHECK-NEXT: mov x10, #-1 +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_c4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: and x0, x0, x8 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +; ---------------------------------------------------------------------------- ; +; Pattern d. 32-bit. +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_d0: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ret + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_d1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x20 +; CHECK-NEXT: sub w8, w8, w1 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ret + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %highbitscleared = shl i32 %val, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_d2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { +; CHECK-LABEL: bzhi32_d3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x20 +; CHECK-NEXT: sub w9, w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %numhighbits = sub i8 32, %numlowbits + %sh_prom = zext i8 %numhighbits to i32 + %highbitscleared = shl i32 %val, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +; 64-bit. + +define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_d0: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_d1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x40 +; CHECK-NEXT: sub w8, w8, w1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ret + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %highbitscleared = shl i64 %val, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_d2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { +; CHECK-LABEL: bzhi64_d3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x40 +; CHECK-NEXT: sub w9, w9, w1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %numhighbits = sub i8 64, %numlowbits + %sh_prom = zext i8 %numhighbits to i64 + %highbitscleared = shl i64 %val, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +; ---------------------------------------------------------------------------- ; +; Constant mask +; ---------------------------------------------------------------------------- ; + +; 32-bit + +define i32 @bzhi32_constant_mask32(i32 %val) nounwind { +; CHECK-LABEL: bzhi32_constant_mask32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w0, w0, #0x7fffffff +; CHECK-NEXT: ret + %masked = and i32 %val, 2147483647 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask32_load(i32* %val) nounwind { +; CHECK-LABEL: bzhi32_constant_mask32_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and w0, w8, #0x7fffffff +; CHECK-NEXT: ret + %val1 = load i32, i32* %val + %masked = and i32 %val1, 2147483647 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask16(i32 %val) nounwind { +; CHECK-LABEL: bzhi32_constant_mask16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w0, w0, #0x7fff +; CHECK-NEXT: ret + %masked = and i32 %val, 32767 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask16_load(i32* %val) nounwind { +; CHECK-LABEL: bzhi32_constant_mask16_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and w0, w8, #0x7fff +; CHECK-NEXT: ret + %val1 = load i32, i32* %val + %masked = and i32 %val1, 32767 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask8(i32 %val) nounwind { +; CHECK-LABEL: bzhi32_constant_mask8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w0, w0, #0x7f +; CHECK-NEXT: ret + %masked = and i32 %val, 127 + ret i32 %masked +} + +define i32 @bzhi32_constant_mask8_load(i32* %val) nounwind { +; CHECK-LABEL: bzhi32_constant_mask8_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and w0, w8, #0x7f +; CHECK-NEXT: ret + %val1 = load i32, i32* %val + %masked = and i32 %val1, 127 + ret i32 %masked +} + +; 64-bit + +define i64 @bzhi64_constant_mask64(i64 %val) nounwind { +; CHECK-LABEL: bzhi64_constant_mask64: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0x3fffffffffffffff +; CHECK-NEXT: ret + %masked = and i64 %val, 4611686018427387903 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask64_load(i64* %val) nounwind { +; CHECK-LABEL: bzhi64_constant_mask64_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: and x0, x8, #0x3fffffffffffffff +; CHECK-NEXT: ret + %val1 = load i64, i64* %val + %masked = and i64 %val1, 4611686018427387903 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask32(i64 %val) nounwind { +; CHECK-LABEL: bzhi64_constant_mask32: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0x7fffffff +; CHECK-NEXT: ret + %masked = and i64 %val, 2147483647 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask32_load(i64* %val) nounwind { +; CHECK-LABEL: bzhi64_constant_mask32_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: and x0, x8, #0x7fffffff +; CHECK-NEXT: ret + %val1 = load i64, i64* %val + %masked = and i64 %val1, 2147483647 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask16(i64 %val) nounwind { +; CHECK-LABEL: bzhi64_constant_mask16: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0x7fff +; CHECK-NEXT: ret + %masked = and i64 %val, 32767 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask16_load(i64* %val) nounwind { +; CHECK-LABEL: bzhi64_constant_mask16_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: and x0, x8, #0x7fff +; CHECK-NEXT: ret + %val1 = load i64, i64* %val + %masked = and i64 %val1, 32767 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask8(i64 %val) nounwind { +; CHECK-LABEL: bzhi64_constant_mask8: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0x7f +; CHECK-NEXT: ret + %masked = and i64 %val, 127 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask8_load(i64* %val) nounwind { +; CHECK-LABEL: bzhi64_constant_mask8_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: and x0, x8, #0x7f +; CHECK-NEXT: ret + %val1 = load i64, i64* %val + %masked = and i64 %val1, 127 + ret i64 %masked +} diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll index 1bec17f78adb..c6c279d7d213 100644 --- a/test/CodeGen/AArch64/f16-instructions.ll +++ b/test/CodeGen/AArch64/f16-instructions.ll @@ -489,7 +489,7 @@ else: ; CHECK-COMMON-LABEL: test_phi: ; CHECK-COMMON: mov x[[PTR:[0-9]+]], x0 -; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x[[PTR]]] +; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x0] ; CHECK-COMMON: [[LOOP:LBB[0-9_]+]]: ; CHECK-COMMON: mov.16b v[[R:[0-9]+]], v[[AB]] ; CHECK-COMMON: ldr h[[AB]], [x[[PTR]]] @@ -736,6 +736,9 @@ declare half @llvm.rint.f16(half %a) #0 declare half @llvm.nearbyint.f16(half %a) #0 declare half @llvm.round.f16(half %a) #0 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 +declare half @llvm.aarch64.neon.frecpe.f16(half %a) #0 +declare half @llvm.aarch64.neon.frecpx.f16(half %a) #0 +declare half @llvm.aarch64.neon.frsqrte.f16(half %a) #0 ; CHECK-CVT-LABEL: test_sqrt: ; CHECK-CVT-NEXT: fcvt s0, h0 @@ -1124,4 +1127,31 @@ define half @test_fmuladd(half %a, half %b, half %c) #0 { ret half %r } +; CHECK-FP16-LABEL: test_vrecpeh_f16: +; CHECK-FP16-NEXT: frecpe h0, h0 +; CHECK-FP16-NEXT: ret + +define half @test_vrecpeh_f16(half %a) #0 { + %r = call half @llvm.aarch64.neon.frecpe.f16(half %a) + ret half %r +} + +; CHECK-FP16-LABEL: test_vrecpxh_f16: +; CHECK-FP16-NEXT: frecpx h0, h0 +; CHECK-FP16-NEXT: ret + +define half @test_vrecpxh_f16(half %a) #0 { + %r = call half @llvm.aarch64.neon.frecpx.f16(half %a) + ret half %r +} + +; CHECK-FP16-LABEL: test_vrsqrteh_f16: +; CHECK-FP16-NEXT: frsqrte h0, h0 +; CHECK-FP16-NEXT: ret + +define half @test_vrsqrteh_f16(half %a) #0 { + %r = call half @llvm.aarch64.neon.frsqrte.f16(half %a) + ret half %r +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/fabs.ll b/test/CodeGen/AArch64/fabs.ll new file mode 100644 index 000000000000..86610363cef2 --- /dev/null +++ b/test/CodeGen/AArch64/fabs.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +; Test against PR36600: https://bugs.llvm.org/show_bug.cgi?id=36600 +; This is not fabs. If X = -0.0, it should return -0.0 not 0.0. + +define double @not_fabs(double %x) #0 { +; CHECK-LABEL: not_fabs: +; CHECK: // %bb.0: +; CHECK-NEXT: fneg d1, d0 +; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: fcsel d0, d1, d0, le +; CHECK-NEXT: ret + %cmp = fcmp nnan ole double %x, 0.0 + %sub = fsub nnan double -0.0, %x + %cond = select i1 %cmp, double %sub, double %x + ret double %cond +} + +; Try again with different type, predicate, and compare constant. + +define float @still_not_fabs(float %x) #0 { +; CHECK-LABEL: still_not_fabs: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: fneg s2, s0 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: fcsel s0, s0, s2, ge +; CHECK-NEXT: ret + %cmp = fcmp nnan oge float %x, -0.0 + %sub = fsub nnan float -0.0, %x + %cond = select i1 %cmp, float %x, float %sub + ret float %cond +} + +attributes #0 = { "no-nans-fp-math"="true" } + diff --git a/test/CodeGen/AArch64/fadd-combines.ll b/test/CodeGen/AArch64/fadd-combines.ll index c106f293ccff..be027a7b558b 100644 --- a/test/CodeGen/AArch64/fadd-combines.ll +++ b/test/CodeGen/AArch64/fadd-combines.ll @@ -1,63 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s +define double @test1(double %a, double %b) { ; CHECK-LABEL: test1: -; CHECK: fadd d1, d1, d1 -; CHECK: fsub d0, d0, d1 -define double @test1(double %a, double %b) local_unnamed_addr #0 { -entry: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd d1, d1, d1 +; CHECK-NEXT: fsub d0, d0, d1 +; CHECK-NEXT: ret %mul = fmul double %b, -2.000000e+00 %add1 = fadd double %a, %mul ret double %add1 } ; DAGCombine will canonicalize 'a - 2.0*b' to 'a + -2.0*b' + +define double @test2(double %a, double %b) { ; CHECK-LABEL: test2: -; CHECK: fadd d1, d1, d1 -; CHECK: fsub d0, d0, d1 -define double @test2(double %a, double %b) local_unnamed_addr #0 { -entry: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd d1, d1, d1 +; CHECK-NEXT: fsub d0, d0, d1 +; CHECK-NEXT: ret %mul = fmul double %b, 2.000000e+00 %add1 = fsub double %a, %mul ret double %add1 } +define double @test3(double %a, double %b, double %c) { ; CHECK-LABEL: test3: -; CHECK: fmul d0, d0, d1 -; CHECK: fadd d1, d2, d2 -; CHECK: fsub d0, d0, d1 -define double @test3(double %a, double %b, double %c) local_unnamed_addr #0 { -entry: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: fadd d1, d2, d2 +; CHECK-NEXT: fsub d0, d0, d1 +; CHECK-NEXT: ret %mul = fmul double %a, %b %mul1 = fmul double %c, 2.000000e+00 %sub = fsub double %mul, %mul1 ret double %sub } +define double @test4(double %a, double %b, double %c) { ; CHECK-LABEL: test4: -; CHECK: fmul d0, d0, d1 -; CHECK: fadd d1, d2, d2 -; CHECK: fsub d0, d0, d1 -define double @test4(double %a, double %b, double %c) local_unnamed_addr #0 { -entry: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: fadd d1, d2, d2 +; CHECK-NEXT: fsub d0, d0, d1 +; CHECK-NEXT: ret %mul = fmul double %a, %b %mul1 = fmul double %c, -2.000000e+00 %add2 = fadd double %mul, %mul1 ret double %add2 } -; CHECK-LABEL: test5: -; CHECK: fadd v1.4s, v1.4s, v1.4s -; CHECK: fsub v0.4s, v0.4s, v1.4s define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test5: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v1.4s, v1.4s, v1.4s +; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret %mul = fmul <4 x float> %b, <float -2.0, float -2.0, float -2.0, float -2.0> %add = fadd <4 x float> %a, %mul ret <4 x float> %add } -; CHECK-LABEL: test6: -; CHECK: fadd v1.4s, v1.4s, v1.4s -; CHECK: fsub v0.4s, v0.4s, v1.4s define <4 x float> @test6(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test6: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v1.4s, v1.4s, v1.4s +; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret %mul = fmul <4 x float> %b, <float 2.0, float 2.0, float 2.0, float 2.0> %add = fsub <4 x float> %a, %mul ret <4 x float> %add @@ -65,14 +75,66 @@ define <4 x float> @test6(<4 x float> %a, <4 x float> %b) { ; Don't fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) if the fmul has ; multiple uses. + +define double @test7(double %a, double %b) nounwind { ; CHECK-LABEL: test7: -; CHECK: fmul -define double @test7(double %a, double %b) local_unnamed_addr #0 { -entry: +; CHECK: // %bb.0: +; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fmov d2, #-2.00000000 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fadd d8, d0, d1 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: bl use +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %mul = fmul double %b, -2.000000e+00 %add1 = fadd double %a, %mul call void @use(double %mul) ret double %add1 } +define float @fadd_const_multiuse_fmf(float %x) { +; CHECK-LABEL: fadd_const_multiuse_fmf: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI7_0 +; CHECK-NEXT: adrp x9, .LCPI7_1 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI7_1] +; CHECK-NEXT: fadd s1, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: ret + %a1 = fadd float %x, 42.0 + %a2 = fadd nsz reassoc float %a1, 17.0 + %a3 = fadd float %a1, %a2 + ret float %a3 +} + +; DAGCombiner transforms this into: (x + 59.0) + (x + 17.0). +; The machine combiner transforms this into a chain of 3 dependent adds: +; ((x + 59.0) + 17.0) + x + +define float @fadd_const_multiuse_attr(float %x) #0 { +; CHECK-LABEL: fadd_const_multiuse_attr: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, .LCPI8_1 +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI8_1] +; CHECK-NEXT: ldr s2, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: fadd s1, s0, s1 +; CHECK-NEXT: fadd s1, s2, s1 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: ret + %a1 = fadd float %x, 42.0 + %a2 = fadd float %a1, 17.0 + %a3 = fadd float %a1, %a2 + ret float %a3 +} + +attributes #0 = { "unsafe-fp-math"="true" } + declare void @use(double) + diff --git a/test/CodeGen/AArch64/falkor-hwpf-fix.mir b/test/CodeGen/AArch64/falkor-hwpf-fix.mir index 38622ae0e49a..e37d8be34948 100644 --- a/test/CodeGen/AArch64/falkor-hwpf-fix.mir +++ b/test/CodeGen/AArch64/falkor-hwpf-fix.mir @@ -3,147 +3,147 @@ # Verify that the tag collision between the loads is resolved for various load opcodes. # CHECK-LABEL: name: hwpf1 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LDRWui %[[BASE]], 0 -# CHECK: LDRWui %x1, 1 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LDRWui $[[BASE]], 0 +# CHECK: LDRWui $x1, 1 name: hwpf1 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1 + liveins: $w0, $x1 - %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4) - %w2 = LDRWui %x1, 1 + $w2 = LDRWui $x1, 0 :: ("aarch64-strided-access" load 4) + $w2 = LDRWui $x1, 1 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpf2 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LD1i64 %q2, 0, %[[BASE]] -# CHECK: LDRWui %x1, 0 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LD1i64 $q2, 0, $[[BASE]] +# CHECK: LDRWui $x1, 0 name: hwpf2 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1, %q2 + liveins: $w0, $x1, $q2 - %q2 = LD1i64 %q2, 0, %x1 :: ("aarch64-strided-access" load 4) - %w2 = LDRWui %x1, 0 + $q2 = LD1i64 $q2, 0, $x1 :: ("aarch64-strided-access" load 4) + $w2 = LDRWui $x1, 0 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpf3 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LD1i8 %q2, 0, %[[BASE]] -# CHECK: LDRWui %x1, 0 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LD1i8 $q2, 0, $[[BASE]] +# CHECK: LDRWui $x1, 0 name: hwpf3 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1, %q2 + liveins: $w0, $x1, $q2 - %q2 = LD1i8 %q2, 0, %x1 :: ("aarch64-strided-access" load 4) - %w0 = LDRWui %x1, 0 + $q2 = LD1i8 $q2, 0, $x1 :: ("aarch64-strided-access" load 4) + $w0 = LDRWui $x1, 0 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpf4 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LD1Onev1d %[[BASE]] -# CHECK: LDRWui %x1, 0 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LD1Onev1d $[[BASE]] +# CHECK: LDRWui $x1, 0 name: hwpf4 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1 + liveins: $w0, $x1 - %d2 = LD1Onev1d %x1 :: ("aarch64-strided-access" load 4) - %w2 = LDRWui %x1, 0 + $d2 = LD1Onev1d $x1 :: ("aarch64-strided-access" load 4) + $w2 = LDRWui $x1, 0 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpf5 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LD1Twov1d %[[BASE]] -# CHECK: LDRWui %x1, 0 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LD1Twov1d $[[BASE]] +# CHECK: LDRWui $x1, 0 name: hwpf5 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1 + liveins: $w0, $x1 - %d2_d3 = LD1Twov1d %x1 :: ("aarch64-strided-access" load 4) - %w0 = LDRWui %x1, 0 + $d2_d3 = LD1Twov1d $x1 :: ("aarch64-strided-access" load 4) + $w0 = LDRWui $x1, 0 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpf6 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LDPQi %[[BASE]] -# CHECK: LDRWui %x1, 3 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LDPQi $[[BASE]] +# CHECK: LDRWui $x1, 3 name: hwpf6 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1 + liveins: $w0, $x1 - %q2, %q3 = LDPQi %x1, 3 :: ("aarch64-strided-access" load 4) - %w0 = LDRWui %x1, 3 + $q2, $q3 = LDPQi $x1, 3 :: ("aarch64-strided-access" load 4) + $w0 = LDRWui $x1, 3 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpf7 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LDPXi %[[BASE]] -# CHECK: LDRWui %x1, 2 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LDPXi $[[BASE]] +# CHECK: LDRWui $x1, 2 name: hwpf7 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1 + liveins: $w0, $x1 - %x2, %x3 = LDPXi %x1, 3 :: ("aarch64-strided-access" load 4) - %w2 = LDRWui %x1, 2 + $x2, $x3 = LDPXi $x1, 3 :: ("aarch64-strided-access" load 4) + $w2 = LDRWui $x1, 2 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR @@ -153,154 +153,154 @@ body: | # for post increment addressing for various load opcodes. # CHECK-LABEL: name: hwpfinc1 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LDRWpost %[[BASE]], 0 -# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 -# CHECK: LDRWui %x1, 1 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LDRWpost $[[BASE]], 0 +# CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0 +# CHECK: LDRWui $x1, 1 name: hwpfinc1 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1 + liveins: $w0, $x1 - %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4) - %w2 = LDRWui %x1, 1 + $x1, $w2 = LDRWpost $x1, 0 :: ("aarch64-strided-access" load 4) + $w2 = LDRWui $x1, 1 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpfinc2 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LD1i64_POST %q2, 0, %[[BASE]] -# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 -# CHECK: LDRWui %x1, 1 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LD1i64_POST $q2, 0, $[[BASE]] +# CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0 +# CHECK: LDRWui $x1, 1 name: hwpfinc2 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1, %q2 + liveins: $w0, $x1, $q2 - %x1, %q2 = LD1i64_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4) - %w2 = LDRWui %x1, 132 + $x1, $q2 = LD1i64_POST $q2, 0, $x1, $x1 :: ("aarch64-strided-access" load 4) + $w2 = LDRWui $x1, 132 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpfinc3 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LD1i8_POST %q2, 0, %[[BASE]] -# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 -# CHECK: LDRWui %x1, 132 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LD1i8_POST $q2, 0, $[[BASE]] +# CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0 +# CHECK: LDRWui $x1, 132 name: hwpfinc3 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1, %q2 + liveins: $w0, $x1, $q2 - %x1, %q2 = LD1i8_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4) - %w0 = LDRWui %x1, 132 + $x1, $q2 = LD1i8_POST $q2, 0, $x1, $x1 :: ("aarch64-strided-access" load 4) + $w0 = LDRWui $x1, 132 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpfinc4 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LD1Rv1d_POST %[[BASE]] -# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 -# CHECK: LDRWui %x1, 252 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LD1Rv1d_POST $[[BASE]] +# CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0 +# CHECK: LDRWui $x1, 252 name: hwpfinc4 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1, %q2 + liveins: $w0, $x1, $q2 - %x1, %d2 = LD1Rv1d_POST %x1, %xzr :: ("aarch64-strided-access" load 4) - %w2 = LDRWui %x1, 252 + $x1, $d2 = LD1Rv1d_POST $x1, $xzr :: ("aarch64-strided-access" load 4) + $w2 = LDRWui $x1, 252 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpfinc5 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LD3Threev2s_POST %[[BASE]] -# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 -# CHECK: LDRWroX %x17, %x0 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LD3Threev2s_POST $[[BASE]] +# CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0 +# CHECK: LDRWroX $x17, $x0 name: hwpfinc5 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1, %x17, %q2 + liveins: $w0, $x1, $x17, $q2 - %x1, %d2_d3_d4 = LD3Threev2s_POST %x1, %x0 :: ("aarch64-strided-access" load 4) - %w0 = LDRWroX %x17, %x0, 0, 0 + $x1, $d2_d3_d4 = LD3Threev2s_POST $x1, $x0 :: ("aarch64-strided-access" load 4) + $w0 = LDRWroX $x17, $x0, 0, 0 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpfinc6 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LDPDpost %[[BASE]] -# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 -# CHECK: LDRWui %x17, 2 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LDPDpost $[[BASE]] +# CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0 +# CHECK: LDRWui $x17, 2 name: hwpfinc6 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1, %x17, %q2 + liveins: $w0, $x1, $x17, $q2 - %x1, %d2, %d3 = LDPDpost %x1, 3 :: ("aarch64-strided-access" load 4) - %w16 = LDRWui %x17, 2 + $x1, $d2, $d3 = LDPDpost $x1, 3 :: ("aarch64-strided-access" load 4) + $w16 = LDRWui $x17, 2 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR ... --- # CHECK-LABEL: name: hwpfinc7 -# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 -# CHECK: LDPXpost %[[BASE]] -# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 -# CHECK: LDRWui %x17, 2 +# CHECK: $[[BASE:[a-z0-9]+]] = ORRXrs $xzr, $x1, 0 +# CHECK: LDPXpost $[[BASE]] +# CHECK: $x1 = ORRXrs $xzr, $[[BASE]], 0 +# CHECK: LDRWui $x17, 2 name: hwpfinc7 tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1, %x17, %q2 + liveins: $w0, $x1, $x17, $q2 - %x1, %x2, %x3 = LDPXpost %x1, 3 :: ("aarch64-strided-access" load 4) - %w18 = LDRWui %x17, 2 + $x1, $x2, $x3 = LDPXpost $x1, 3 :: ("aarch64-strided-access" load 4) + $w18 = LDRWui $x17, 2 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR @@ -309,23 +309,23 @@ body: | # Check that we handle case of strided load with no HW prefetcher tag correctly. # CHECK-LABEL: name: hwpf_notagbug -# CHECK-NOT: ORRXrs %xzr -# CHECK: LDARW %x1 -# CHECK-NOT: ORRXrs %xzr -# CHECK: LDRWui %x1 +# CHECK-NOT: ORRXrs $xzr +# CHECK: LDARW $x1 +# CHECK-NOT: ORRXrs $xzr +# CHECK: LDRWui $x1 name: hwpf_notagbug tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x1, %x17 + liveins: $w0, $x1, $x17 - %w1 = LDARW %x1 :: ("aarch64-strided-access" load 4) - %w1 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4) - %w17 = LDRWui %x17, 0 :: ("aarch64-strided-access" load 4) + $w1 = LDARW $x1 :: ("aarch64-strided-access" load 4) + $w1 = LDRWui $x1, 0 :: ("aarch64-strided-access" load 4) + $w17 = LDRWui $x17, 0 :: ("aarch64-strided-access" load 4) - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR @@ -334,21 +334,46 @@ body: | # Check that we treat sp based loads as non-prefetching. # CHECK-LABEL: name: hwpf_spbase -# CHECK-NOT: ORRXrs %xzr -# CHECK: LDRWui %x15 -# CHECK: LDRWui %sp +# CHECK-NOT: ORRXrs $xzr +# CHECK: LDRWui $x15 +# CHECK: LDRWui $sp name: hwpf_spbase tracksRegLiveness: true body: | bb.0: - liveins: %w0, %x15 + liveins: $w0, $x15 - %w1 = LDRWui %x15, 0 :: ("aarch64-strided-access" load 4) - %w17 = LDRWui %sp, 0 + $w1 = LDRWui $x15, 0 :: ("aarch64-strided-access" load 4) + $w17 = LDRWui $sp, 0 - %w0 = SUBWri %w0, 1, 0 - %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv - Bcc 9, %bb.0, implicit %nzcv + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv + + bb.1: + RET_ReallyLR +... +--- +# Check that non-base registers are considered live when finding a +# scratch register by making sure we don't use $x2 for the scratch +# register for the inserted ORRXrs. +# CHECK-LABEL: name: hwpf_offreg +# CHECK: $x3 = ORRXrs $xzr, $x1, 0 +# CHECK: $w10 = LDRWroX $x3, $x2, 0, 0 +name: hwpf_offreg +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $x1, $x2, $x17, $x18 + + $w10 = LDRWroX $x1, $x2, 0, 0 :: ("aarch64-strided-access" load 4) + + $x2 = ORRXrs $xzr, $x10, 0 + $w26 = LDRWroX $x1, $x2, 0, 0 + + $w0 = SUBWri $w0, 1, 0 + $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv + Bcc 9, %bb.0, implicit $nzcv bb.1: RET_ReallyLR diff --git a/test/CodeGen/AArch64/fast-isel-atomic.ll b/test/CodeGen/AArch64/fast-isel-atomic.ll index ec612616ae2a..452129e49515 100644 --- a/test/CodeGen/AArch64/fast-isel-atomic.ll +++ b/test/CodeGen/AArch64/fast-isel-atomic.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-- -O0 -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-- -O0 -fast-isel=0 -global-isel=false -verify-machineinstrs < %s | FileCheck %s ; Note that checking SelectionDAG output isn't strictly necessary, but they ; currently match, so we might as well check both! Feel free to remove SDAG. diff --git a/test/CodeGen/AArch64/fast-isel-gep.ll b/test/CodeGen/AArch64/fast-isel-gep.ll index 0cb1fd8465d4..665476969ecb 100644 --- a/test/CodeGen/AArch64/fast-isel-gep.ll +++ b/test/CodeGen/AArch64/fast-isel-gep.ll @@ -34,7 +34,7 @@ define i32* @test_array3(i32* %a) { define i32* @test_array4(i32* %a) { ; CHECK-LABEL: test_array4 ; CHECK: mov [[REG:x[0-9]+]], #4104 -; CHECK-NEXR: add x0, x0, [[REG]] +; CHECK-NEXT: add x0, x0, [[REG]] %1 = getelementptr inbounds i32, i32* %a, i64 1026 ret i32* %1 } diff --git a/test/CodeGen/AArch64/fast-isel-memcpy.ll b/test/CodeGen/AArch64/fast-isel-memcpy.ll index 07595a954db0..290e0c918ade 100644 --- a/test/CodeGen/AArch64/fast-isel-memcpy.ll +++ b/test/CodeGen/AArch64/fast-isel-memcpy.ll @@ -8,8 +8,8 @@ define void @test(i64 %a, i8* %b) { %1 = and i64 %a, 9223372036854775807 %2 = inttoptr i64 %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %b, i64 8, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %b, i64 8, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/test/CodeGen/AArch64/fast-isel-sdiv.ll b/test/CodeGen/AArch64/fast-isel-sdiv.ll index 3c8de43af6c1..2fd0ec02f968 100644 --- a/test/CodeGen/AArch64/fast-isel-sdiv.ll +++ b/test/CodeGen/AArch64/fast-isel-sdiv.ll @@ -1,56 +1,69 @@ -; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ISEL +; RUN: llc -mtriple=aarch64-linux-gnu -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,FAST define i32 @sdiv_i32_exact(i32 %a) { -; CHECK-LABEL: sdiv_i32_exact -; CHECK: asr {{w[0-9]+}}, w0, #3 +; CHECK-LABEL: sdiv_i32_exact: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w0, w0, #3 +; CHECK-NEXT: ret %1 = sdiv exact i32 %a, 8 ret i32 %1 } define i32 @sdiv_i32_pos(i32 %a) { -; CHECK-LABEL: sdiv_i32_pos -; CHECK: add [[REG1:w[0-9]+]], w0, #7 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt -; CHECK-NEXT: asr {{w[0-9]+}}, [[REG2]], #3 +; CHECK-LABEL: sdiv_i32_pos: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #7 // =7 +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: asr w0, w8, #3 +; CHECK-NEXT: ret %1 = sdiv i32 %a, 8 ret i32 %1 } define i32 @sdiv_i32_neg(i32 %a) { -; CHECK-LABEL: sdiv_i32_neg -; CHECK: add [[REG1:w[0-9]+]], w0, #7 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt -; CHECK-NEXT: neg {{w[0-9]+}}, [[REG2]], asr #3 +; CHECK-LABEL: sdiv_i32_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #7 // =7 +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: neg w0, w8, asr #3 +; CHECK-NEXT: ret %1 = sdiv i32 %a, -8 ret i32 %1 } define i64 @sdiv_i64_exact(i64 %a) { -; CHECK-LABEL: sdiv_i64_exact -; CHECK: asr {{x[0-9]+}}, x0, #4 +; CHECK-LABEL: sdiv_i64_exact: +; CHECK: // %bb.0: +; CHECK-NEXT: asr x0, x0, #4 +; CHECK-NEXT: ret %1 = sdiv exact i64 %a, 16 ret i64 %1 } define i64 @sdiv_i64_pos(i64 %a) { -; CHECK-LABEL: sdiv_i64_pos -; CHECK: add [[REG1:x[0-9]+]], x0, #15 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt -; CHECK-NEXT: asr {{x[0-9]+}}, [[REG2]], #4 +; CHECK-LABEL: sdiv_i64_pos: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #15 // =15 +; CHECK-NEXT: cmp x0, #0 // =0 +; CHECK-NEXT: csel x8, x8, x0, lt +; CHECK-NEXT: asr x0, x8, #4 +; CHECK-NEXT: ret %1 = sdiv i64 %a, 16 ret i64 %1 } define i64 @sdiv_i64_neg(i64 %a) { -; CHECK-LABEL: sdiv_i64_neg -; CHECK: add [[REG1:x[0-9]+]], x0, #15 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt -; CHECK-NEXT: neg {{x[0-9]+}}, [[REG2]], asr #4 +; CHECK-LABEL: sdiv_i64_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #15 // =15 +; CHECK-NEXT: cmp x0, #0 // =0 +; CHECK-NEXT: csel x8, x8, x0, lt +; CHECK-NEXT: neg x0, x8, asr #4 +; CHECK-NEXT: ret %1 = sdiv i64 %a, -16 ret i64 %1 } diff --git a/test/CodeGen/AArch64/fast-isel-sp-adjust.ll b/test/CodeGen/AArch64/fast-isel-sp-adjust.ll index 9201d1be6a9c..a17a2564b4fe 100644 --- a/test/CodeGen/AArch64/fast-isel-sp-adjust.ll +++ b/test/CodeGen/AArch64/fast-isel-sp-adjust.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -mtriple=aarch64-apple-ios -o - %s | FileCheck %s -; RUN: not llc -O0 -mtriple=aarch64-apple-ios -o /dev/null -fast-isel-abort=3 %s 2> %t +; RUN: llc -O0 -fast-isel -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: not llc -O0 -mtriple=aarch64-apple-ios -o /dev/null -fast-isel -fast-isel-abort=3 %s 2> %t ; RUN: FileCheck %s --check-prefix=CHECK-ERRORS < %t ; The issue here is that FastISel cannot emit an ADDrr where one of the inputs diff --git a/test/CodeGen/AArch64/fast-regalloc-empty-bb-with-liveins.mir b/test/CodeGen/AArch64/fast-regalloc-empty-bb-with-liveins.mir new file mode 100644 index 000000000000..f620cd22901c --- /dev/null +++ b/test/CodeGen/AArch64/fast-regalloc-empty-bb-with-liveins.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -run-pass regallocfast -o - %s | FileCheck %s +# This test used to crash the fast register alloc. +# Basically, when a basic block has liveins, the fast regalloc +# was deferencing the begin iterator of this block. However, +# when this block is empty and it will just crashed! +--- +name: crashing +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: crashing + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $x0, $x1 + ; CHECK: bb.1: + ; CHECK: renamable $w0 = MOVi32imm -1 + ; CHECK: RET_ReallyLR implicit killed $w0 + bb.1: + liveins: $x0, $x1 + + bb.2: + %0:gpr32 = MOVi32imm -1 + $w0 = COPY %0 + RET_ReallyLR implicit $w0 + +... diff --git a/test/CodeGen/AArch64/fcvt_combine.ll b/test/CodeGen/AArch64/fcvt_combine.ll index 5644fa28533b..294eb89de067 100644 --- a/test/CodeGen/AArch64/fcvt_combine.ll +++ b/test/CodeGen/AArch64/fcvt_combine.ll @@ -100,9 +100,8 @@ define <2 x i32> @test9(<2 x float> %f) { ret <2 x i32> %vcvt.i } -; Don't combine all undefs. +; Combine all undefs. ; CHECK-LABEL: test10 -; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}} ; CHECK: ret define <2 x i32> @test10(<2 x float> %f) { diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll index 0827fb8c9e8c..a13f7e1e34ac 100644 --- a/test/CodeGen/AArch64/flags-multiuse.ll +++ b/test/CodeGen/AArch64/flags-multiuse.ll @@ -17,6 +17,9 @@ define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) { %val = zext i1 %test to i32 ; CHECK: cset {{[xw][0-9]+}}, ne +; CHECK: mov [[RHSCOPY:w[0-9]+]], [[RHS]] +; CHECK: mov [[LHSCOPY:w[0-9]+]], [[LHS]] + store i32 %val, i32* @var call void @bar() @@ -25,7 +28,7 @@ define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) { ; Currently, the comparison is emitted again. An MSR/MRS pair would also be ; acceptable, but assuming the call preserves NZCV is not. br i1 %test, label %iftrue, label %iffalse -; CHECK: cmp [[LHS]], [[RHS]] +; CHECK: cmp [[LHSCOPY]], [[RHSCOPY]] ; CHECK: b.eq iftrue: diff --git a/test/CodeGen/AArch64/fold-global-offsets.ll b/test/CodeGen/AArch64/fold-global-offsets.ll new file mode 100644 index 000000000000..ffcdc2bee5ff --- /dev/null +++ b/test/CodeGen/AArch64/fold-global-offsets.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s + +@x1 = external hidden global [2 x i64] +@x2 = external hidden global [16777216 x i64] +@x3 = external hidden global { [9 x i8*], [8 x i8*] } + +define i64 @f1() { + ; CHECK: f1: + ; CHECK: adrp x8, x1+16 + ; CHECK: ldr x0, [x8, :lo12:x1+16] + %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2) + ret i64 %l +} + +define i64 @f2() { + ; CHECK: f2: + ; CHECK: adrp x8, x1 + ; CHECK: add x8, x8, :lo12:x1 + ; CHECK: ldr x0, [x8, #24] + %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3) + ret i64 %l +} + +define i64 @f3() { + ; CHECK: f3: + ; CHECK: adrp x8, x1+1 + ; CHECK: add x8, x8, :lo12:x1+1 + ; CHECK: ldr x0, [x8] + %l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*) + ret i64 %l +} + +define [2 x i64] @f4() { + ; CHECK: f4: + ; CHECK: adrp x8, x2+8 + ; CHECK: add x8, x8, :lo12:x2+8 + ; CHECK: ldp x0, x1, [x8] + %l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*) + ret [2 x i64] %l +} + +define i64 @f5() { + ; CHECK: f5: + ; CHECK: adrp x8, x2+2097144 + ; CHECK: ldr x0, [x8, :lo12:x2+2097144] + ; CHECK: ret + %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143) + ret i64 %l +} + +define i64 @f6() { + ; CHECK: f6: + ; CHECK: adrp x8, x2 + ; CHECK: add x8, x8, :lo12:x2 + ; CHECK: orr w9, wzr, #0x200000 + ; CHECK: ldr x0, [x8, x9] + ; CHECK: ret + %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144) + ret i64 %l +} + +define i32 @f7() { +entry: + ; CHECK: f7 + ; CHECK: adrp x8, x3+108 + ; CHECK: ldr w0, [x8, :lo12:x3+108] + %l = load i32, i32* getelementptr (i32, i32* inttoptr (i64 trunc (i128 lshr (i128 bitcast (<2 x i64> <i64 undef, i64 ptrtoint (i8** getelementptr inbounds ({ [9 x i8*], [8 x i8*] }, { [9 x i8*], [8 x i8*] }* @x3, i64 0, inrange i32 1, i64 2) to i64)> to i128), i128 64) to i64) to i32*), i64 5) + ret i32 %l +} diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll index fbdd8f984e8c..32881e6522be 100644 --- a/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -295,18 +295,12 @@ define <4 x i16> @fptoui_i16(<4 x half> %a) #0 { define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_une: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, ne -; CHECK-CVT-DAG: csetm {{.*}}, ne -; CHECK-CVT-DAG: csetm {{.*}}, ne -; CHECK-CVT-DAG: csetm {{.*}}, ne +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmeq +; CHECK-CVT: mvn +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_une: ; CHECK-FP16-NOT: fcvt @@ -325,22 +319,14 @@ define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ueq: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], eq -; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], eq -; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], eq -; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], eq -; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, vc -; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, vc -; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, vc -; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, vc +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: fcmgt +; CHECK-CVT: orr +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ueq: ; CHECK-FP16-NOT: fcvt @@ -359,18 +345,12 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ugt: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, hi -; CHECK-CVT-DAG: csetm {{.*}}, hi -; CHECK-CVT-DAG: csetm {{.*}}, hi -; CHECK-CVT-DAG: csetm {{.*}}, hi +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ugt: ; CHECK-FP16-NOT: fcvt @@ -389,18 +369,12 @@ define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_uge: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, pl -; CHECK-CVT-DAG: csetm {{.*}}, pl -; CHECK-CVT-DAG: csetm {{.*}}, pl -; CHECK-CVT-DAG: csetm {{.*}}, pl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_uge: ; CHECK-FP16-NOT: fcvt @@ -419,18 +393,12 @@ define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ult: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, lt -; CHECK-CVT-DAG: csetm {{.*}}, lt -; CHECK-CVT-DAG: csetm {{.*}}, lt -; CHECK-CVT-DAG: csetm {{.*}}, lt +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ult: ; CHECK-FP16-NOT: fcvt @@ -449,18 +417,12 @@ define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ule: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, le -; CHECK-CVT-DAG: csetm {{.*}}, le -; CHECK-CVT-DAG: csetm {{.*}}, le -; CHECK-CVT-DAG: csetm {{.*}}, le +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ule: ; CHECK-FP16-NOT: fcvt @@ -479,18 +441,14 @@ define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_uno: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, vs -; CHECK-CVT-DAG: csetm {{.*}}, vs -; CHECK-CVT-DAG: csetm {{.*}}, vs -; CHECK-CVT-DAG: csetm {{.*}}, vs +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: fcmgt +; CHECK-CVT: orr +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_uno: ; CHECK-FP16-NOT: fcvt @@ -509,22 +467,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_one: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], mi -; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], mi -; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], mi -; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], mi -; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, le -; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, le -; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, le -; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, le +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: fcmgt +; CHECK-CVT: orr +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_one: ; CHECK-FP16-NOT: fcvt @@ -543,18 +492,11 @@ define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_oeq: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, eq -; CHECK-CVT-DAG: csetm {{.*}}, eq -; CHECK-CVT-DAG: csetm {{.*}}, eq -; CHECK-CVT-DAG: csetm {{.*}}, eq +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmeq +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_oeq: ; CHECK-FP16-NOT: fcvt @@ -573,18 +515,11 @@ define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ogt: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, gt -; CHECK-CVT-DAG: csetm {{.*}}, gt -; CHECK-CVT-DAG: csetm {{.*}}, gt -; CHECK-CVT-DAG: csetm {{.*}}, gt +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ogt: ; CHECK-FP16-NOT: fcvt @@ -603,18 +538,11 @@ define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_oge: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, ge -; CHECK-CVT-DAG: csetm {{.*}}, ge -; CHECK-CVT-DAG: csetm {{.*}}, ge -; CHECK-CVT-DAG: csetm {{.*}}, ge +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_oge: ; CHECK-FP16-NOT: fcvt @@ -633,18 +561,11 @@ define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_olt: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, mi -; CHECK-CVT-DAG: csetm {{.*}}, mi -; CHECK-CVT-DAG: csetm {{.*}}, mi -; CHECK-CVT-DAG: csetm {{.*}}, mi +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_olt: ; CHECK-FP16-NOT: fcvt @@ -663,18 +584,11 @@ define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ole: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, ls -; CHECK-CVT-DAG: csetm {{.*}}, ls -; CHECK-CVT-DAG: csetm {{.*}}, ls -; CHECK-CVT-DAG: csetm {{.*}}, ls +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ole: ; CHECK-FP16-NOT: fcvt @@ -693,18 +607,13 @@ define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ord: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, vc -; CHECK-CVT-DAG: csetm {{.*}}, vc -; CHECK-CVT-DAG: csetm {{.*}}, vc -; CHECK-CVT-DAG: csetm {{.*}}, vc +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: fcmgt +; CHECK-CVT: orr +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ord: ; CHECK-FP16-NOT: fcvt diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll index 5215260c23a2..ccf8c6874975 100644 --- a/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefix=CHECK-CVT --check-prefix=CHECK -; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-FP16 --check-prefix=CHECK +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK-CVT --check-prefix=CHECK +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK-FP16 --check-prefix=CHECK define <8 x half> @add_h(<8 x half> %a, <8 x half> %b) { entry: diff --git a/test/CodeGen/AArch64/fp16-vector-load-store.ll b/test/CodeGen/AArch64/fp16-vector-load-store.ll index b71b39f1acd1..1d1794abc306 100644 --- a/test/CodeGen/AArch64/fp16-vector-load-store.ll +++ b/test/CodeGen/AArch64/fp16-vector-load-store.ll @@ -88,6 +88,45 @@ entry: ret void } +define void @store_lane0_64(half* nocapture %a, <4 x half> %b) #1 { +; CHECK-LABEL: store_lane0_64: +; CHECK: str h0, [x0] +entry: + %0 = extractelement <4 x half> %b, i32 0 + store half %0, half* %a, align 2 + ret void +} + +define void @storeu_lane0_64(half* nocapture %a, <4 x half> %b) #1 { +; CHECK-LABEL: storeu_lane0_64: +; CHECK: stur h0, [x{{[0-9]+}}, #-2] +entry: + %0 = getelementptr half, half* %a, i64 -1 + %1 = extractelement <4 x half> %b, i32 0 + store half %1, half* %0, align 2 + ret void +} + +define void @storero_lane_64(half* nocapture %a, <4 x half> %b, i64 %c) #1 { +; CHECK-LABEL: storero_lane_64: +; CHECK: st1 { v0.h }[2], [x{{[0-9]+}}] +entry: + %0 = getelementptr half, half* %a, i64 %c + %1 = extractelement <4 x half> %b, i32 2 + store half %1, half* %0, align 2 + ret void +} + +define void @storero_lane0_64(half* nocapture %a, <4 x half> %b, i64 %c) #1 { +; CHECK-LABEL: storero_lane0_64: +; CHECK: str h0, [x0, x1, lsl #1] +entry: + %0 = getelementptr half, half* %a, i64 %c + %1 = extractelement <4 x half> %b, i32 0 + store half %1, half* %0, align 2 + ret void +} + ; Store from one lane of v8f16 define void @store_lane_128(half* nocapture %a, <8 x half> %b) #1 { ; CHECK-LABEL: store_lane_128: @@ -98,6 +137,45 @@ entry: ret void } +define void @store_lane0_128(half* nocapture %a, <8 x half> %b) #1 { +; CHECK-LABEL: store_lane0_128: +; CHECK: str h0, [x0] +entry: + %0 = extractelement <8 x half> %b, i32 0 + store half %0, half* %a, align 2 + ret void +} + +define void @storeu_lane0_128(half* nocapture %a, <8 x half> %b) #1 { +; CHECK-LABEL: storeu_lane0_128: +; CHECK: stur h0, [x{{[0-9]+}}, #-2] +entry: + %0 = getelementptr half, half* %a, i64 -1 + %1 = extractelement <8 x half> %b, i32 0 + store half %1, half* %0, align 2 + ret void +} + +define void @storero_lane_128(half* nocapture %a, <8 x half> %b, i64 %c) #1 { +; CHECK-LABEL: storero_lane_128: +; CHECK: st1 { v0.h }[4], [x{{[0-9]+}}] +entry: + %0 = getelementptr half, half* %a, i64 %c + %1 = extractelement <8 x half> %b, i32 4 + store half %1, half* %0, align 2 + ret void +} + +define void @storero_lane0_128(half* nocapture %a, <8 x half> %b, i64 %c) #1 { +; CHECK-LABEL: storero_lane0_128: +; CHECK: str h0, [x0, x1, lsl #1] +entry: + %0 = getelementptr half, half* %a, i64 %c + %1 = extractelement <8 x half> %b, i32 0 + store half %1, half* %0, align 2 + ret void +} + ; NEON intrinsics - (de-)interleaving loads and stores declare { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>*) declare { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>*) diff --git a/test/CodeGen/AArch64/fp16_intrinsic_lane.ll b/test/CodeGen/AArch64/fp16_intrinsic_lane.ll new file mode 100644 index 000000000000..9e276cdfbf16 --- /dev/null +++ b/test/CodeGen/AArch64/fp16_intrinsic_lane.ll @@ -0,0 +1,331 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare half @llvm.aarch64.neon.fmulx.f16(half, half) +declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) +declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) +declare half @llvm.fma.f16(half, half, half) #1 + +define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfma_lane_f16: +; CHECK: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer + %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %lane1, <4 x half> %a) + ret <4 x half> %fmla3 +} + +define dso_local <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmaq_lane_f16: +; CHECK: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer + %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %lane1, <8 x half> %a) + ret <8 x half> %fmla3 +} + +define dso_local <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfma_laneq_f16: +; CHECK: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v1.4h, v2.4h +; CHECK-NEXT: ret +entry: + %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %b, <4 x half> %a) + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmaq_laneq_f16: +; CHECK: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v1.8h, v2.8h +; CHECK-NEXT: ret +entry: + %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %b, <8 x half> %a) + ret <8 x half> %0 +} + +define dso_local <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) { +; CHECK-LABEL: t_vfma_n_f16: +; CHECK: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vecinit = insertelement <4 x half> undef, half %c, i32 0 + %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %vecinit3, <4 x half> %a) #4 + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { +; CHECK-LABEL: t_vfmaq_n_f16: +; CHECK: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vecinit = insertelement <8 x half> undef, half %c, i32 0 + %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %vecinit7, <8 x half> %a) #4 + ret <8 x half> %0 +} + +define dso_local half @t_vfmah_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmah_lane_f16: +; CHECK: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %extract = extractelement <4 x half> %c, i32 0 + %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) + ret half %0 +} + +define dso_local half @t_vfmah_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmah_laneq_f16: +; CHECK: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %extract = extractelement <8 x half> %c, i32 0 + %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) + ret half %0 +} + +define dso_local <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfms_lane_f16: +; CHECK: fneg v1.4h, v1.4h +; CHECK-NEXT: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b + %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer + %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %lane1, <4 x half> %a) + ret <4 x half> %fmla3 +} + +define dso_local <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmsq_lane_f16: +; CHECK: fneg v1.8h, v1.8h +; CHECK-NEXT: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b + %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer + %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %lane1, <8 x half> %a) + ret <8 x half> %fmla3 +} + +define dso_local <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfms_laneq_f16: +; CHECK: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmls v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b + %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %sub, <4 x half> %a) + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmsq_laneq_f16: +; CHECK: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmls v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b + %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %sub, <8 x half> %a) + ret <8 x half> %0 +} + +define dso_local <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) { +; CHECK-LABEL: t_vfms_n_f16: +; CHECK: fneg v1.4h, v1.4h +; CHECK-NEXT: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b + %vecinit = insertelement <4 x half> undef, half %c, i32 0 + %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %vecinit3, <4 x half> %a) #4 + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { +; CHECK-LABEL: t_vfmsq_n_f16: +; CHECK: fneg v1.8h, v1.8h +; CHECK-NEXT: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b + %vecinit = insertelement <8 x half> undef, half %c, i32 0 + %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %vecinit7, <8 x half> %a) #4 + ret <8 x half> %0 +} + +define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmsh_lane_f16: +; CHECK: fneg h1, h1 +; CHECK: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %0 = fsub half 0xH8000, %b + %extract = extractelement <4 x half> %c, i32 0 + %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) + ret half %1 +} + +define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmsh_laneq_f16: +; CHECK: fneg h1, h1 +; CHECK-NEXT: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %0 = fsub half 0xH8000, %b + %extract = extractelement <8 x half> %c, i32 0 + %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) + ret half %1 +} + +define dso_local <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmul_laneq_f16: +; CHECK: fmul v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer + %mul = fmul <4 x half> %shuffle, %a + ret <4 x half> %mul +} + +define dso_local <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulq_laneq_f16: +; CHECK: fmul v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %mul = fmul <8 x half> %shuffle, %a + ret <8 x half> %mul +} + +define dso_local half @t_vmulh_lane_f16(half %a, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vmulh_lane_f16: +; CHECK: fmul h0, h0, v1.h[0] +; CHECK-NEXT: ret +entry: + %0 = extractelement <4 x half> %c, i32 0 + %1 = fmul half %0, %a + ret half %1 +} + +define dso_local half @t_vmulh_laneq_f16(half %a, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vmulh_laneq_f16: +; CHECK: fmul h0, h0, v1.h[0] +; CHECK-NEXT: ret +entry: + %0 = extractelement <8 x half> %c, i32 0 + %1 = fmul half %0, %a + ret half %1 +} + +define dso_local half @t_vmulx_f16(half %a, half %b) { +; CHECK-LABEL: t_vmulx_f16: +; CHECK: fmulx h0, h0, h1 +; CHECK-NEXT: ret +entry: + %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b) + ret half %fmulx.i +} + +define dso_local half @t_vmulxh_lane_f16(half %a, <4 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulxh_lane_f16: +; CHECK: fmulx h0, h0, v1.h[3] +; CHECK-NEXT: ret +entry: + %extract = extractelement <4 x half> %b, i32 3 + %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract) + ret half %fmulx.i +} + +define dso_local <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulx_lane_f16: +; CHECK: fmulx v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer + %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4 + ret <4 x half> %vmulx2.i +} + +define dso_local <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulxq_lane_f16: +; CHECK: fmulx v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> zeroinitializer + %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4 + ret <8 x half> %vmulx2.i +} + +define dso_local <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulx_laneq_f16: +; CHECK: fmulx v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer + %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4 + ret <4 x half> %vmulx2.i +} + +define dso_local <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulxq_laneq_f16: +; CHECK: fmulx v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4 + ret <8 x half> %vmulx2.i +} + +define dso_local half @t_vmulxh_laneq_f16(half %a, <8 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulxh_laneq_f16: +; CHECK: fmulx h0, h0, v1.h[7] +; CHECK-NEXT: ret +entry: + %extract = extractelement <8 x half> %b, i32 7 + %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract) + ret half %fmulx.i +} + +define dso_local <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) { +; CHECK-LABEL: t_vmulx_n_f16: +; CHECK: dup v1.4h, v1.h[0] +; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vecinit = insertelement <4 x half> undef, half %c, i32 0 + %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer + %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %vecinit3) #4 + ret <4 x half> %vmulx2.i +} + +define dso_local <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) { +; CHECK-LABEL: t_vmulxq_n_f16: +; CHECK: dup v1.8h, v1.h[0] +; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vecinit = insertelement <8 x half> undef, half %c, i32 0 + %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %vecinit7) #4 + ret <8 x half> %vmulx2.i +} diff --git a/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll b/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll new file mode 100644 index 000000000000..c8333b253ec4 --- /dev/null +++ b/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll @@ -0,0 +1,318 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtps.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtps.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtns.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtns.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtms.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtms.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtau.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtau.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtas.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtas.i32.f16(half) +declare half @llvm.aarch64.neon.frsqrte.f16(half) +declare half @llvm.aarch64.neon.frecpx.f16(half) +declare half @llvm.aarch64.neon.frecpe.f16(half) + +define dso_local i16 @t2(half %a) { +; CHECK-LABEL: t2: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, eq +; CHECK-NEXT: ret +entry: + %0 = fcmp oeq half %a, 0xH0000 + %vceqz = sext i1 %0 to i16 + ret i16 %vceqz +} + +define dso_local i16 @t3(half %a) { +; CHECK-LABEL: t3: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, ge +; CHECK-NEXT: ret +entry: + %0 = fcmp oge half %a, 0xH0000 + %vcgez = sext i1 %0 to i16 + ret i16 %vcgez +} + +define dso_local i16 @t4(half %a) { +; CHECK-LABEL: t4: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, gt +; CHECK-NEXT: ret +entry: + %0 = fcmp ogt half %a, 0xH0000 + %vcgtz = sext i1 %0 to i16 + ret i16 %vcgtz +} + +define dso_local i16 @t5(half %a) { +; CHECK-LABEL: t5: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, ls +; CHECK-NEXT: ret +entry: + %0 = fcmp ole half %a, 0xH0000 + %vclez = sext i1 %0 to i16 + ret i16 %vclez +} + +define dso_local i16 @t6(half %a) { +; CHECK-LABEL: t6: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, mi +; CHECK-NEXT: ret +entry: + %0 = fcmp olt half %a, 0xH0000 + %vcltz = sext i1 %0 to i16 + ret i16 %vcltz +} + +define dso_local half @t8(i32 %a) { +; CHECK-LABEL: t8: +; CHECK: scvtf h0, w0 +; CHECK-NEXT: ret +entry: + %0 = sitofp i32 %a to half + ret half %0 +} + +define dso_local half @t9(i64 %a) { +; CHECK-LABEL: t9: +; CHECK: scvtf h0, x0 +; CHECK-NEXT: ret +entry: + %0 = sitofp i64 %a to half + ret half %0 +} + +define dso_local half @t12(i64 %a) { +; CHECK-LABEL: t12: +; CHECK: ucvtf h0, x0 +; CHECK-NEXT: ret +entry: + %0 = uitofp i64 %a to half + ret half %0 +} + +define dso_local i16 @t13(half %a) { +; CHECK-LABEL: t13: +; CHECK: fcvtzs w0, h0 +; CHECK-NEXT: ret +entry: + %0 = fptosi half %a to i16 + ret i16 %0 +} + +define dso_local i64 @t15(half %a) { +; CHECK-LABEL: t15: +; CHECK: fcvtzs x0, h0 +; CHECK-NEXT: ret +entry: + %0 = fptosi half %a to i64 + ret i64 %0 +} + +define dso_local i16 @t16(half %a) { +; CHECK-LABEL: t16: +; CHECK: fcvtzs w0, h0 +; CHECK-NEXT: ret +entry: + %0 = fptoui half %a to i16 + ret i16 %0 +} + +define dso_local i64 @t18(half %a) { +; CHECK-LABEL: t18: +; CHECK: fcvtzu x0, h0 +; CHECK-NEXT: ret +entry: + %0 = fptoui half %a to i64 + ret i64 %0 +} + +define dso_local i16 @t19(half %a) { +; CHECK-LABEL: t19: +; CHECK: fcvtas w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t21(half %a) { +; CHECK-LABEL: t21: +; CHECK: fcvtas x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a) + ret i64 %vcvtah_s64_f16 +} + +define dso_local i16 @t22(half %a) { +; CHECK-LABEL: t22: +; CHECK: fcvtau w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t24(half %a) { +; CHECK-LABEL: t24: +; CHECK: fcvtau x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtah_u64_f16 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a) + ret i64 %vcvtah_u64_f16 +} + +define dso_local i16 @t25(half %a) { +; CHECK-LABEL: t25: +; CHECK: fcvtms w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t27(half %a) { +; CHECK-LABEL: t27: +; CHECK: fcvtms x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtmh_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a) + ret i64 %vcvtmh_s64_f16 +} + +define dso_local i16 @t28(half %a) { +; CHECK-LABEL: t28: +; CHECK: fcvtmu w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t30(half %a) { +; CHECK-LABEL: t30: +; CHECK: fcvtmu x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtmh_u64_f16 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a) + ret i64 %vcvtmh_u64_f16 +} + +define dso_local i16 @t31(half %a) { +; CHECK-LABEL: t31: +; CHECK: fcvtns w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t33(half %a) { +; CHECK-LABEL: t33: +; CHECK: fcvtns x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtnh_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a) + ret i64 %vcvtnh_s64_f16 +} + +define dso_local i16 @t34(half %a) { +; CHECK-LABEL: t34: +; CHECK: fcvtnu w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t36(half %a) { +; CHECK-LABEL: t36: +; CHECK: fcvtnu x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtnh_u64_f16 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a) + ret i64 %vcvtnh_u64_f16 +} + +define dso_local i16 @t37(half %a) { +; CHECK-LABEL: t37: +; CHECK: fcvtps w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t39(half %a) { +; CHECK-LABEL: t39: +; CHECK: fcvtps x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtph_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a) + ret i64 %vcvtph_s64_f16 +} + +define dso_local i16 @t40(half %a) { +; CHECK-LABEL: t40: +; CHECK: fcvtpu w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t42(half %a) { +; CHECK-LABEL: t42: +; CHECK: fcvtpu x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtph_u64_f16 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a) + ret i64 %vcvtph_u64_f16 +} + +define dso_local half @t44(half %a) { +; CHECK-LABEL: t44: +; CHECK: frecpe h0, h0 +; CHECK-NEXT: ret +entry: + %vrecpeh_f16 = tail call half @llvm.aarch64.neon.frecpe.f16(half %a) + ret half %vrecpeh_f16 +} + +define dso_local half @t45(half %a) { +; CHECK-LABEL: t45: +; CHECK: frecpx h0, h0 +; CHECK-NEXT: ret +entry: + %vrecpxh_f16 = tail call half @llvm.aarch64.neon.frecpx.f16(half %a) + ret half %vrecpxh_f16 +} + +define dso_local half @t53(half %a) { +; CHECK-LABEL: t53: +; CHECK: frsqrte h0, h0 +; CHECK-NEXT: ret +entry: + %vrsqrteh_f16 = tail call half @llvm.aarch64.neon.frsqrte.f16(half %a) + ret half %vrsqrteh_f16 +} diff --git a/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll b/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll new file mode 100644 index 000000000000..13a18b10e9f9 --- /dev/null +++ b/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll @@ -0,0 +1,320 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare half @llvm.aarch64.sisd.fabd.f16(half, half) +declare half @llvm.aarch64.neon.fmax.f16(half, half) +declare half @llvm.aarch64.neon.fmin.f16(half, half) +declare half @llvm.aarch64.neon.frsqrts.f16(half, half) +declare half @llvm.aarch64.neon.frecps.f16(half, half) +declare half @llvm.aarch64.neon.fmulx.f16(half, half) +declare half @llvm.fabs.f16(half) + +define dso_local half @t_vabdh_f16(half %a, half %b) { +; CHECK-LABEL: t_vabdh_f16: +; CHECK: fabd h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vabdh_f16 = tail call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b) + ret half %vabdh_f16 +} + +define dso_local half @t_vabdh_f16_from_fsub_fabs(half %a, half %b) { +; CHECK-LABEL: t_vabdh_f16_from_fsub_fabs: +; CHECK: fabd h0, h0, h1 +; CHECK-NEXT: ret +entry: + %sub = fsub half %a, %b + %abs = tail call half @llvm.fabs.f16(half %sub) + ret half %abs +} + +define dso_local i16 @t_vceqh_f16(half %a, half %b) { +; CHECK-LABEL: t_vceqh_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, eq +; CHECK-NEXT: ret +entry: + %0 = fcmp oeq half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local i16 @t_vcgeh_f16(half %a, half %b) { +; CHECK-LABEL: t_vcgeh_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, ge +; CHECK-NEXT: ret +entry: + %0 = fcmp oge half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local i16 @t_vcgth_f16(half %a, half %b) { +; CHECK-LABEL: t_vcgth_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, gt +; CHECK-NEXT: ret +entry: + %0 = fcmp ogt half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local i16 @t_vcleh_f16(half %a, half %b) { +; CHECK-LABEL: t_vcleh_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, ls +; CHECK-NEXT: ret +entry: + %0 = fcmp ole half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local i16 @t_vclth_f16(half %a, half %b) { +; CHECK-LABEL: t_vclth_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, mi +; CHECK-NEXT: ret +entry: + %0 = fcmp olt half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local half @t_vmaxh_f16(half %a, half %b) { +; CHECK-LABEL: t_vmaxh_f16: +; CHECK: fmax h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vmax = tail call half @llvm.aarch64.neon.fmax.f16(half %a, half %b) + ret half %vmax +} + +define dso_local half @t_vminh_f16(half %a, half %b) { +; CHECK-LABEL: t_vminh_f16: +; CHECK: fmin h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vmin = tail call half @llvm.aarch64.neon.fmin.f16(half %a, half %b) + ret half %vmin +} + +define dso_local half @t_vmulxh_f16(half %a, half %b) { +; CHECK-LABEL: t_vmulxh_f16: +; CHECK: fmulx h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vmulxh_f16 = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b) + ret half %vmulxh_f16 +} + +define dso_local half @t_vrecpsh_f16(half %a, half %b) { +; CHECK-LABEL: t_vrecpsh_f16: +; CHECK: frecps h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vrecps = tail call half @llvm.aarch64.neon.frecps.f16(half %a, half %b) + ret half %vrecps +} + +define dso_local half @t_vrsqrtsh_f16(half %a, half %b) { +; CHECK-LABEL: t_vrsqrtsh_f16: +; CHECK: frsqrts h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vrsqrtsh_f16 = tail call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b) + ret half %vrsqrtsh_f16 +} + +declare half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32, i32) #1 +declare half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64, i32) #1 +declare i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half, i32) #1 +declare i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half, i32) #1 +declare half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32, i32) #1 +declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1 + +define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) { +; CHECK-LABEL: test_vcvth_n_f16_s16_1: +; CHECK: fmov s0, w[[wReg:[0-9]+]] +; CHECK-NEXT: scvtf h0, h0, #1 +; CHECK-NEXT: ret +entry: + %sext = sext i16 %a to i32 + %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 1) + ret half %fcvth_n +} + +define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) { +; CHECK-LABEL: test_vcvth_n_f16_s16_16: +; CHECK: fmov s0, w[[wReg:[0-9]+]] +; CHECK-NEXT: scvtf h0, h0, #16 +; CHECK-NEXT: ret +entry: + %sext = sext i16 %a to i32 + %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 16) + ret half %fcvth_n +} + +define dso_local half @test_vcvth_n_f16_s32_1(i32 %a) { +; CHECK-LABEL: test_vcvth_n_f16_s32_1: +; CHECK: fmov s0, w0 +; CHECK-NEXT: scvtf h0, h0, #1 +; CHECK-NEXT: ret +entry: + %vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1) + ret half %vcvth_n_f16_s32 +} + +define dso_local half @test_vcvth_n_f16_s32_16(i32 %a) { +; CHECK-LABEL: test_vcvth_n_f16_s32_16: +; CHECK: fmov s0, w0 +; CHECK-NEXT: scvtf h0, h0, #16 +; CHECK-NEXT: ret +entry: + %vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 16) + ret half %vcvth_n_f16_s32 +} + +define dso_local i16 @test_vcvth_n_s16_f16_1(half %a) { +; CHECK-LABEL: test_vcvth_n_s16_f16_1: +; CHECK: fcvtzs h0, h0, #1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1) + %0 = trunc i32 %fcvth_n to i16 + ret i16 %0 +} + +define dso_local i16 @test_vcvth_n_s16_f16_16(half %a) { +; CHECK-LABEL: test_vcvth_n_s16_f16_16: +; CHECK: fcvtzs h0, h0, #16 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16) + %0 = trunc i32 %fcvth_n to i16 + ret i16 %0 +} + +define dso_local i32 @test_vcvth_n_s32_f16_1(half %a) { +; CHECK-LABEL: test_vcvth_n_s32_f16_1: +; CHECK: fcvtzs h0, h0, #1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1) + ret i32 %vcvth_n_s32_f16 +} + +define dso_local i32 @test_vcvth_n_s32_f16_16(half %a) { +; CHECK-LABEL: test_vcvth_n_s32_f16_16: +; CHECK: fcvtzs h0, h0, #16 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16) + ret i32 %vcvth_n_s32_f16 +} + +define dso_local i64 @test_vcvth_n_s64_f16_1(half %a) { +; CHECK-LABEL: test_vcvth_n_s64_f16_1: +; CHECK: fcvtzs h0, h0, #1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 1) + ret i64 %vcvth_n_s64_f16 +} + +define dso_local i64 @test_vcvth_n_s64_f16_32(half %a) { +; CHECK-LABEL: test_vcvth_n_s64_f16_32: +; CHECK: fcvtzs h0, h0, #32 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 32) + ret i64 %vcvth_n_s64_f16 +} + +define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) { +; CHECK-LABEL: test_vcvth_n_f16_u16_1: +; CHECK: ucvtf h0, h0, #1 +; CHECK-NEXT: ret +entry: + %0 = zext i16 %a to i32 + %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 1) + ret half %fcvth_n +} + +define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) { +; CHECK-LABEL: test_vcvth_n_f16_u16_16: +; CHECK: ucvtf h0, h0, #16 +; CHECK-NEXT: ret +entry: + %0 = zext i16 %a to i32 + %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 16) + ret half %fcvth_n +} + +define dso_local half @test_vcvth_n_f16_u32_1(i32 %a) { +; CHECK-LABEL: test_vcvth_n_f16_u32_1: +; CHECK: fmov s0, w0 +; CHECK-NEXT: ucvtf h0, h0, #1 +; CHECK-NEXT: ret +entry: + %vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1) + ret half %vcvth_n_f16_u32 +} + +define dso_local half @test_vcvth_n_f16_u32_16(i32 %a) { +; CHECK-LABEL: test_vcvth_n_f16_u32_16: +; CHECK: ucvtf h0, h0, #16 +; CHECK-NEXT: ret +entry: + %vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 16) + ret half %vcvth_n_f16_u32 +} + +define dso_local i16 @test_vcvth_n_u16_f16_1(half %a) { +; CHECK-LABEL: test_vcvth_n_u16_f16_1: +; CHECK: fcvtzu h0, h0, #1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1) + %0 = trunc i32 %fcvth_n to i16 + ret i16 %0 +} + +define dso_local i16 @test_vcvth_n_u16_f16_16(half %a) { +; CHECK-LABEL: test_vcvth_n_u16_f16_16: +; CHECK: fcvtzu h0, h0, #16 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16) + %0 = trunc i32 %fcvth_n to i16 + ret i16 %0 +} + +define dso_local i32 @test_vcvth_n_u32_f16_1(half %a) { +; CHECK-LABEL: test_vcvth_n_u32_f16_1: +; CHECK: fcvtzu h0, h0, #1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1) + ret i32 %vcvth_n_u32_f16 +} + +define dso_local i32 @test_vcvth_n_u32_f16_16(half %a) { +; CHECK-LABEL: test_vcvth_n_u32_f16_16: +; CHECK: fcvtzu h0, h0, #16 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16) + ret i32 %vcvth_n_u32_f16 +} diff --git a/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll b/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll new file mode 100644 index 000000000000..d43834d05de3 --- /dev/null +++ b/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare half @llvm.fma.f16(half, half, half) + +define dso_local half @t_vfmah_f16(half %a, half %b, half %c) { +; CHECK-LABEL: t_vfmah_f16: +; CHECK: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %0 = tail call half @llvm.fma.f16(half %b, half %c, half %a) + ret half %0 +} + diff --git a/test/CodeGen/AArch64/fp16_intrinsic_vector_1op.ll b/test/CodeGen/AArch64/fp16_intrinsic_vector_1op.ll new file mode 100644 index 000000000000..becbbdd45434 --- /dev/null +++ b/test/CodeGen/AArch64/fp16_intrinsic_vector_1op.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>) +declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) +declare <4 x half> @llvm.sqrt.v4f16(<4 x half>) +declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) + +define dso_local <4 x half> @t_vrndi_f16(<4 x half> %a) { +; CHECK-LABEL: t_vrndi_f16: +; CHECK: frinti v0.4h, v0.4h +; CHECK-NEXT: ret +entry: + %vrndi1.i = tail call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %a) + ret <4 x half> %vrndi1.i +} + +define dso_local <8 x half> @t_vrndiq_f16(<8 x half> %a) { +; CHECK-LABEL: t_vrndiq_f16: +; CHECK: frinti v0.8h, v0.8h +; CHECK-NEXT: ret +entry: + %vrndi1.i = tail call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a) + ret <8 x half> %vrndi1.i +} + +define dso_local <4 x half> @t_vsqrt_f16(<4 x half> %a) { +; CHECK-LABEL: t_vsqrt_f16: +; CHECK: fsqrt v0.4h, v0.4h +; CHECK-NEXT: ret +entry: + %vsqrt.i = tail call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a) + ret <4 x half> %vsqrt.i +} + +define dso_local <8 x half> @t_vsqrtq_f16(<8 x half> %a) { +; CHECK-LABEL: t_vsqrtq_f16: +; CHECK: fsqrt v0.8h, v0.8h +; CHECK-NEXT: ret +entry: + %vsqrt.i = tail call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a) + ret <8 x half> %vsqrt.i +} diff --git a/test/CodeGen/AArch64/fp16_intrinsic_vector_2op.ll b/test/CodeGen/AArch64/fp16_intrinsic_vector_2op.ll new file mode 100644 index 000000000000..1674d8627920 --- /dev/null +++ b/test/CodeGen/AArch64/fp16_intrinsic_vector_2op.ll @@ -0,0 +1,122 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.fabs.v4f16(<4 x half>) +declare <8 x half> @llvm.fabs.v8f16(<8 x half>) + +define dso_local <4 x half> @t_vdiv_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vdiv_f16: +; CHECK: fdiv v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %div.i = fdiv <4 x half> %a, %b + ret <4 x half> %div.i +} + +define dso_local <8 x half> @t_vdivq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vdivq_f16: +; CHECK: fdiv v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %div.i = fdiv <8 x half> %a, %b + ret <8 x half> %div.i +} + +define dso_local <4 x half> @t_vmulx_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vmulx_f16: +; CHECK: fmulx v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vmulx2.i +} + +define dso_local <8 x half> @t_vmulxq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vmulxq_f16: +; CHECK: fmulx v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vmulx2.i +} + +define dso_local <4 x half> @t_vpminnm_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vpminnm_f16: +; CHECK: fminnmp v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vpminnm2.i = tail call <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vpminnm2.i +} + +define dso_local <8 x half> @t_vpminnmq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vpminnmq_f16: +; CHECK: fminnmp v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vpminnm2.i = tail call <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vpminnm2.i +} + +define dso_local <4 x half> @t_vpmaxnm_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vpmaxnm_f16: +; CHECK: fmaxnmp v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vpmaxnm2.i = tail call <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vpmaxnm2.i +} + +define dso_local <8 x half> @t_vpmaxnmq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vpmaxnmq_f16: +; CHECK: fmaxnmp v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vpmaxnm2.i = tail call <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vpmaxnm2.i +} + +define dso_local <4 x half> @t_vabd_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vabd_f16: +; CHECK: fabd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vabdh_f16 = tail call <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vabdh_f16 +} + +define dso_local <8 x half> @t_vabdq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vabdq_f16: +; CHECK: fabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vabdh_f16 = tail call <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vabdh_f16 +} + +define dso_local <4 x half> @t_vabd_f16_from_fsub_fabs(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vabd_f16_from_fsub_fabs: +; CHECK: fabd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %sub = fsub <4 x half> %a, %b + %abs = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %sub) + ret <4 x half> %abs +} + +define dso_local <8 x half> @t_vabdq_f16_from_fsub_fabs(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vabdq_f16_from_fsub_fabs: +; CHECK: fabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %sub = fsub <8 x half> %a, %b + %abs = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %sub) + ret <8 x half> %abs +} diff --git a/test/CodeGen/AArch64/fp16_intrinsic_vector_3op.ll b/test/CodeGen/AArch64/fp16_intrinsic_vector_3op.ll new file mode 100644 index 000000000000..c8a33a6cf5c2 --- /dev/null +++ b/test/CodeGen/AArch64/fp16_intrinsic_vector_3op.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) +declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) + +define dso_local <4 x half> @t_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: t_vfma_f16: +; CHECK: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a) + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: t_vfmaq_f16: +; CHECK: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a) + ret <8 x half> %0 +} diff --git a/test/CodeGen/AArch64/ftrunc.ll b/test/CodeGen/AArch64/ftrunc.ll new file mode 100644 index 000000000000..c7bf514e902b --- /dev/null +++ b/test/CodeGen/AArch64/ftrunc.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s + +define float @trunc_unsigned_f32(float %x) #0 { +; CHECK-LABEL: trunc_unsigned_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz s0, s0 +; CHECK-NEXT: ret + %i = fptoui float %x to i32 + %r = uitofp i32 %i to float + ret float %r +} + +define double @trunc_unsigned_f64(double %x) #0 { +; CHECK-LABEL: trunc_unsigned_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz d0, d0 +; CHECK-NEXT: ret + %i = fptoui double %x to i64 + %r = uitofp i64 %i to double + ret double %r +} + +define float @trunc_signed_f32(float %x) #0 { +; CHECK-LABEL: trunc_signed_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz s0, s0 +; CHECK-NEXT: ret + %i = fptosi float %x to i32 + %r = sitofp i32 %i to float + ret float %r +} + +define double @trunc_signed_f64(double %x) #0 { +; CHECK-LABEL: trunc_signed_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz d0, d0 +; CHECK-NEXT: ret + %i = fptosi double %x to i64 + %r = sitofp i64 %i to double + ret double %r +} + +attributes #0 = { "no-signed-zeros-fp-math"="true" } + diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll index cf6545dab385..3e6a8bb2c8ce 100644 --- a/test/CodeGen/AArch64/func-argpassing.ll +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -164,11 +164,11 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) { ; CHECK-LABEL: check_i128_regalign store i128 %val1, i128* @var128 -; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK-DAG: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK-DAG: stp x2, x3, [x[[VAR128]]] ret i64 %val2 -; CHECK: mov x0, x4 +; CHECK-DAG: mov x0, x4 } define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, @@ -186,11 +186,11 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define i32 @test_extern() { ; CHECK-LABEL: test_extern: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 undef, i8* align 4 undef, i32 undef, i1 0) ; CHECK: bl memcpy ret i32 0 } diff --git a/test/CodeGen/AArch64/funnel-shift-rot.ll b/test/CodeGen/AArch64/funnel-shift-rot.ll new file mode 100644 index 000000000000..af612eafd333 --- /dev/null +++ b/test/CodeGen/AArch64/funnel-shift-rot.ll @@ -0,0 +1,235 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +declare i8 @llvm.fshl.i8(i8, i8, i8) +declare i16 @llvm.fshl.i16(i16, i16, i16) +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +declare i8 @llvm.fshr.i8(i8, i8, i8) +declare i16 @llvm.fshr.i16(i16, i16, i16) +declare i32 @llvm.fshr.i32(i32, i32, i32) +declare i64 @llvm.fshr.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +; When first 2 operands match, it's a rotate. + +define i8 @rotl_i8_const_shift(i8 %x) { +; CHECK-LABEL: rotl_i8_const_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ubfx w8, w0, #5, #3 +; CHECK-NEXT: bfi w8, w0, #3, #29 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) + ret i8 %f +} + +define i64 @rotl_i64_const_shift(i64 %x) { +; CHECK-LABEL: rotl_i64_const_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ror x0, x0, #61 +; CHECK-NEXT: ret + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) + ret i64 %f +} + +; When first 2 operands match, it's a rotate (by variable amount). + +define i16 @rotl_i16(i16 %x, i16 %z) { +; CHECK-LABEL: rotl_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w10, wzr, #0x10 +; CHECK-NEXT: sub w10, w10, w1 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: and w9, w1, #0xf +; CHECK-NEXT: and w10, w10, #0xf +; CHECK-NEXT: lsl w9, w0, w9 +; CHECK-NEXT: lsr w8, w8, w10 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) + ret i16 %f +} + +define i32 @rotl_i32(i32 %x, i32 %z) { +; CHECK-LABEL: rotl_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x20 +; CHECK-NEXT: sub w8, w8, w1 +; CHECK-NEXT: ror w0, w0, w8 +; CHECK-NEXT: ret + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) + ret i32 %f +} + +define i64 @rotl_i64(i64 %x, i64 %z) { +; CHECK-LABEL: rotl_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w9, wzr, #0x40 +; CHECK-NEXT: sub w9, w9, w1 +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: lsr x9, x0, x9 +; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: ret + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) + ret i64 %f +} + +; Vector rotate. + +define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { +; CHECK-LABEL: rotl_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.4s, #31 +; CHECK-NEXT: movi v3.4s, #32 +; CHECK-NEXT: and v4.16b, v1.16b, v2.16b +; CHECK-NEXT: sub v1.4s, v3.4s, v1.4s +; CHECK-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: ushl v3.4s, v0.4s, v4.4s +; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-NEXT: orr v0.16b, v3.16b, v0.16b +; CHECK-NEXT: ret + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) + ret <4 x i32> %f +} + +; Vector rotate by constant splat amount. + +define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) { +; CHECK-LABEL: rotl_v4i32_rotl_const_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ushr v1.4s, v0.4s, #29 +; CHECK-NEXT: shl v0.4s, v0.4s, #3 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) + ret <4 x i32> %f +} + +; Repeat everything for funnel shift right. + +; When first 2 operands match, it's a rotate. + +define i8 @rotr_i8_const_shift(i8 %x) { +; CHECK-LABEL: rotr_i8_const_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ubfx w8, w0, #3, #5 +; CHECK-NEXT: bfi w8, w0, #5, #27 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) + ret i8 %f +} + +define i32 @rotr_i32_const_shift(i32 %x) { +; CHECK-LABEL: rotr_i32_const_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ror w0, w0, #3 +; CHECK-NEXT: ret + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) + ret i32 %f +} + +; When first 2 operands match, it's a rotate (by variable amount). + +define i16 @rotr_i16(i16 %x, i16 %z) { +; CHECK-LABEL: rotr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: and w9, w1, #0xf +; CHECK-NEXT: orr w10, wzr, #0x10 +; CHECK-NEXT: lsr w8, w8, w9 +; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: and w9, w9, #0xf +; CHECK-NEXT: lsl w9, w0, w9 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) + ret i16 %f +} + +define i32 @rotr_i32(i32 %x, i32 %z) { +; CHECK-LABEL: rotr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ror w0, w0, w1 +; CHECK-NEXT: ret + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) + ret i32 %f +} + +define i64 @rotr_i64(i64 %x, i64 %z) { +; CHECK-LABEL: rotr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ror x0, x0, x1 +; CHECK-NEXT: ret + %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) + ret i64 %f +} + +; Vector rotate. + +define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { +; CHECK-LABEL: rotr_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.4s, #31 +; CHECK-NEXT: movi v3.4s, #32 +; CHECK-NEXT: and v4.16b, v1.16b, v2.16b +; CHECK-NEXT: sub v1.4s, v3.4s, v1.4s +; CHECK-NEXT: neg v3.4s, v4.4s +; CHECK-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-NEXT: ushl v2.4s, v0.4s, v3.4s +; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) + ret <4 x i32> %f +} + +; Vector rotate by constant splat amount. + +define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) { +; CHECK-LABEL: rotr_v4i32_const_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ushr v1.4s, v0.4s, #3 +; CHECK-NEXT: shl v0.4s, v0.4s, #29 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) + ret <4 x i32> %f +} + +define i32 @rotl_i32_shift_by_bitwidth(i32 %x) { +; CHECK-LABEL: rotl_i32_shift_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32) + ret i32 %f +} + +define i32 @rotr_i32_shift_by_bitwidth(i32 %x) { +; CHECK-LABEL: rotr_i32_shift_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32) + ret i32 %f +} + +define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) { +; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) + ret <4 x i32> %f +} + +define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) { +; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) + ret <4 x i32> %f +} + diff --git a/test/CodeGen/AArch64/funnel-shift.ll b/test/CodeGen/AArch64/funnel-shift.ll new file mode 100644 index 000000000000..d91a9c54cef9 --- /dev/null +++ b/test/CodeGen/AArch64/funnel-shift.ll @@ -0,0 +1,311 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +declare i8 @llvm.fshl.i8(i8, i8, i8) +declare i16 @llvm.fshl.i16(i16, i16, i16) +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +declare i8 @llvm.fshr.i8(i8, i8, i8) +declare i16 @llvm.fshr.i16(i16, i16, i16) +declare i32 @llvm.fshr.i32(i32, i32, i32) +declare i64 @llvm.fshr.i64(i64, i64, i64) +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +; General case - all operands can be variables. + +define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: fshl_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w9, wzr, #0x20 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsl w8, w0, w2 +; CHECK-NEXT: lsr w9, w1, w9 +; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: tst w2, #0x1f +; CHECK-NEXT: csel w0, w0, w8, eq +; CHECK-NEXT: ret + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) + ret i32 %f +} + +; Verify that weird types are minimally supported. +declare i37 @llvm.fshl.i37(i37, i37, i37) +define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { +; CHECK-LABEL: fshl_i37: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x11, #31883 +; CHECK-NEXT: mov w10, #37 +; CHECK-NEXT: movk x11, #3542, lsl #16 +; CHECK-NEXT: movk x11, #51366, lsl #32 +; CHECK-NEXT: sub x12, x10, x2 +; CHECK-NEXT: and x8, x2, #0x1fffffffff +; CHECK-NEXT: movk x11, #56679, lsl #48 +; CHECK-NEXT: and x12, x12, #0x1fffffffff +; CHECK-NEXT: umulh x13, x8, x11 +; CHECK-NEXT: umulh x11, x12, x11 +; CHECK-NEXT: lsr x13, x13, #5 +; CHECK-NEXT: lsr x11, x11, #5 +; CHECK-NEXT: and x9, x1, #0x1fffffffff +; CHECK-NEXT: msub x8, x13, x10, x8 +; CHECK-NEXT: msub x10, x11, x10, x12 +; CHECK-NEXT: lsl x13, x0, x8 +; CHECK-NEXT: lsr x9, x9, x10 +; CHECK-NEXT: orr x9, x13, x9 +; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: csel x0, x0, x9, eq +; CHECK-NEXT: ret + %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) + ret i37 %f +} + +; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 + +declare i7 @llvm.fshl.i7(i7, i7, i7) +define i7 @fshl_i7_const_fold() { +; CHECK-LABEL: fshl_i7_const_fold: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #67 +; CHECK-NEXT: ret + %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) + ret i7 %f +} + +define i8 @fshl_i8_const_fold_overshift_1() { +; CHECK-LABEL: fshl_i8_const_fold_overshift_1: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0x80 +; CHECK-NEXT: ret + %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) + ret i8 %f +} + +define i8 @fshl_i8_const_fold_overshift_2() { +; CHECK-LABEL: fshl_i8_const_fold_overshift_2: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0x78 +; CHECK-NEXT: ret + %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) + ret i8 %f +} + +define i8 @fshl_i8_const_fold_overshift_3() { +; CHECK-LABEL: fshl_i8_const_fold_overshift_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8) + ret i8 %f +} + +; With constant shift amount, this is 'extr'. + +define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { +; CHECK-LABEL: fshl_i32_const_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: extr w0, w0, w1, #23 +; CHECK-NEXT: ret + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) + ret i32 %f +} + +; Check modulo math on shift amount. + +define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { +; CHECK-LABEL: fshl_i32_const_overshift: +; CHECK: // %bb.0: +; CHECK-NEXT: extr w0, w0, w1, #23 +; CHECK-NEXT: ret + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) + ret i32 %f +} + +; 64-bit should also work. + +define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { +; CHECK-LABEL: fshl_i64_const_overshift: +; CHECK: // %bb.0: +; CHECK-NEXT: extr x0, x0, x1, #23 +; CHECK-NEXT: ret + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) + ret i64 %f +} + +; This should work without any node-specific logic. + +define i8 @fshl_i8_const_fold() { +; CHECK-LABEL: fshl_i8_const_fold: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0x80 +; CHECK-NEXT: ret + %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) + ret i8 %f +} + +; Repeat everything for funnel shift right. + +; General case - all operands can be variables. + +define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: fshr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w9, wzr, #0x20 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsr w8, w1, w2 +; CHECK-NEXT: lsl w9, w0, w9 +; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: tst w2, #0x1f +; CHECK-NEXT: csel w0, w1, w8, eq +; CHECK-NEXT: ret + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) + ret i32 %f +} + +; Verify that weird types are minimally supported. +declare i37 @llvm.fshr.i37(i37, i37, i37) +define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { +; CHECK-LABEL: fshr_i37: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x11, #31883 +; CHECK-NEXT: mov w10, #37 +; CHECK-NEXT: movk x11, #3542, lsl #16 +; CHECK-NEXT: movk x11, #51366, lsl #32 +; CHECK-NEXT: sub x12, x10, x2 +; CHECK-NEXT: and x9, x2, #0x1fffffffff +; CHECK-NEXT: movk x11, #56679, lsl #48 +; CHECK-NEXT: and x12, x12, #0x1fffffffff +; CHECK-NEXT: umulh x13, x9, x11 +; CHECK-NEXT: umulh x11, x12, x11 +; CHECK-NEXT: lsr x13, x13, #5 +; CHECK-NEXT: lsr x11, x11, #5 +; CHECK-NEXT: and x8, x1, #0x1fffffffff +; CHECK-NEXT: msub x9, x13, x10, x9 +; CHECK-NEXT: msub x10, x11, x10, x12 +; CHECK-NEXT: lsr x8, x8, x9 +; CHECK-NEXT: lsl x10, x0, x10 +; CHECK-NEXT: orr x8, x10, x8 +; CHECK-NEXT: cmp x9, #0 // =0 +; CHECK-NEXT: csel x0, x1, x8, eq +; CHECK-NEXT: ret + %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) + ret i37 %f +} + +; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 + +declare i7 @llvm.fshr.i7(i7, i7, i7) +define i7 @fshr_i7_const_fold() { +; CHECK-LABEL: fshr_i7_const_fold: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0x1f +; CHECK-NEXT: ret + %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) + ret i7 %f +} + +define i8 @fshr_i8_const_fold_overshift_1() { +; CHECK-LABEL: fshr_i8_const_fold_overshift_1: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0xfe +; CHECK-NEXT: ret + %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) + ret i8 %f +} + +define i8 @fshr_i8_const_fold_overshift_2() { +; CHECK-LABEL: fshr_i8_const_fold_overshift_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #225 +; CHECK-NEXT: ret + %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) + ret i8 %f +} + +define i8 @fshr_i8_const_fold_overshift_3() { +; CHECK-LABEL: fshr_i8_const_fold_overshift_3: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0xff +; CHECK-NEXT: ret + %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) + ret i8 %f +} + +; With constant shift amount, this is 'extr'. + +define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { +; CHECK-LABEL: fshr_i32_const_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: extr w0, w0, w1, #9 +; CHECK-NEXT: ret + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) + ret i32 %f +} + +; Check modulo math on shift amount. 41-32=9. + +define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { +; CHECK-LABEL: fshr_i32_const_overshift: +; CHECK: // %bb.0: +; CHECK-NEXT: extr w0, w0, w1, #9 +; CHECK-NEXT: ret + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) + ret i32 %f +} + +; 64-bit should also work. 105-64 = 41. + +define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { +; CHECK-LABEL: fshr_i64_const_overshift: +; CHECK: // %bb.0: +; CHECK-NEXT: extr x0, x0, x1, #41 +; CHECK-NEXT: ret + %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) + ret i64 %f +} + +; This should work without any node-specific logic. + +define i8 @fshr_i8_const_fold() { +; CHECK-LABEL: fshr_i8_const_fold: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0xfe +; CHECK-NEXT: ret + %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) + ret i8 %f +} + +define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { +; CHECK-LABEL: fshl_i32_shift_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) + ret i32 %f +} + +define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { +; CHECK-LABEL: fshr_i32_shift_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) + ret i32 %f +} + +define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) + ret <4 x i32> %f +} + +define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret + %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) + ret <4 x i32> %f +} + diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll index b5a28a18718c..4b110baa18d4 100644 --- a/test/CodeGen/AArch64/global-merge-1.ll +++ b/test/CodeGen/AArch64/global-merge-1.ll @@ -22,10 +22,10 @@ define void @f1(i32 %a1, i32 %a2) { ;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals ;CHECK: .local .L_MergedGlobals -;CHECK: .comm .L_MergedGlobals,8,8 -;CHECK: m = .L_MergedGlobals -;CHECK: n = .L_MergedGlobals+4 +;CHECK: .comm .L_MergedGlobals,8,4 +;CHECK: .set m, .L_MergedGlobals +;CHECK: .set n, .L_MergedGlobals+4 -;CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 ; @_MergedGlobals -;CHECK-APPLE-IOS-NOT: _m = l__MergedGlobals -;CHECK-APPLE-IOS-NOT: _n = l__MergedGlobals+4 +;CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,2 ; @_MergedGlobals +;CHECK-APPLE-IOS-NOT: .set _m, l__MergedGlobals +;CHECK-APPLE-IOS-NOT: .set _n, l__MergedGlobals+4 diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll index 6cd3f5580438..42539767e6a4 100644 --- a/test/CodeGen/AArch64/global-merge-2.ll +++ b/test/CodeGen/AArch64/global-merge-2.ll @@ -29,24 +29,24 @@ define void @g1(i32 %a1, i32 %a2) { ;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals ;CHECK: .local .L_MergedGlobals -;CHECK: .comm .L_MergedGlobals,12,8 +;CHECK: .comm .L_MergedGlobals,12,4 ;CHECK: .globl x -;CHECK: x = .L_MergedGlobals +;CHECK: .set x, .L_MergedGlobals ;CHECK: .size x, 4 ;CHECK: .globl y -;CHECK: y = .L_MergedGlobals+4 +;CHECK: .set y, .L_MergedGlobals+4 ;CHECK: .size y, 4 ;CHECK: .globl z -;CHECK: z = .L_MergedGlobals+8 +;CHECK: .set z, .L_MergedGlobals+8 ;CHECK: .size z, 4 -;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_x,12,3 +;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_x,12,2 ;CHECK-APPLE-IOS: .globl _x -;CHECK-APPLE-IOS: = __MergedGlobals_x +;CHECK-APPLE-IOS: .set {{.*}}, __MergedGlobals_x ;CHECK-APPLE-IOS: .globl _y -;CHECK-APPLE-IOS: _y = __MergedGlobals_x+4 +;CHECK-APPLE-IOS: .set _y, __MergedGlobals_x+4 ;CHECK-APPLE-IOS: .globl _z -;CHECK-APPLE-IOS: _z = __MergedGlobals_x+8 +;CHECK-APPLE-IOS: .set _z, __MergedGlobals_x+8 ;CHECK-APPLE-IOS: .subsections_via_symbols diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll index 6418f019f747..98d2a1723a35 100644 --- a/test/CodeGen/AArch64/global-merge-3.ll +++ b/test/CodeGen/AArch64/global-merge-3.ll @@ -10,8 +10,8 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) { ;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE ;CHECK-APPLE-IOS-NOT: adrp ;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF -;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE -;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF +;CHECK-APPLE-IOS: adrp x9, _y@PAGE+12 +;CHECK-APPLE-IOS: str w1, [x9] %x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3 %y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3 store i32 %a1, i32* %x3, align 4 @@ -21,31 +21,23 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) { } ;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals -;CHECK: .p2align 4 +;CHECK: .p2align 2 ;CHECK: .L_MergedGlobals: ;CHECK: .size .L_MergedGlobals, 4004 -;CHECK: .type .L_MergedGlobals.1,@object // @_MergedGlobals.1 -;CHECK: .local .L_MergedGlobals.1 -;CHECK: .comm .L_MergedGlobals.1,4000,16 +;CHECK-APPLE-IOS: .zerofill __DATA,__common,_y,4000,2 -;CHECK-APPLE-IOS: .p2align 4 +;CHECK-APPLE-IOS: .p2align 2 ;CHECK-APPLE-IOS: __MergedGlobals_x: ;CHECK-APPLE-IOS: .long 1 ;CHECK-APPLE-IOS: .space 4000 -;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_y,4000,4 -;CHECK: z = .L_MergedGlobals +;CHECK: .set z, .L_MergedGlobals ;CHECK: .globl x -;CHECK: x = .L_MergedGlobals+4 +;CHECK: .set x, .L_MergedGlobals+4 ;CHECK: .size x, 4000 -;CHECK: .globl y -;CHECK: y = .L_MergedGlobals.1 -;CHECK: .size y, 4000 -;CHECK-APPLE-IOS-NOT: _z = __MergedGlobals_x +;CHECK-APPLE-IOS-NOT: .set _z, __MergedGlobals_x ;CHECK-APPLE-IOS:.globl _x -;CHECK-APPLE-IOS: _x = __MergedGlobals_x+4 -;CHECK-APPLE-IOS:.globl _y -;CHECK-APPLE-IOS: _y = __MergedGlobals_y +;CHECK-APPLE-IOS:.set _x, __MergedGlobals_x+4 diff --git a/test/CodeGen/AArch64/global-merge-4.ll b/test/CodeGen/AArch64/global-merge-4.ll index 036b8910d66c..b03330f5d5c8 100644 --- a/test/CodeGen/AArch64/global-merge-4.ll +++ b/test/CodeGen/AArch64/global-merge-4.ll @@ -66,7 +66,7 @@ define internal i32* @returnFoo() #1 { ;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals ;CHECK: .local .L_MergedGlobals -;CHECK: .comm .L_MergedGlobals,60,16 +;CHECK: .comm .L_MergedGlobals,60,4 attributes #0 = { nounwind ssp } attributes #1 = { nounwind readnone ssp } diff --git a/test/CodeGen/AArch64/global-merge-group-by-use.ll b/test/CodeGen/AArch64/global-merge-group-by-use.ll index 86104b7285cf..99866c84b5ae 100644 --- a/test/CodeGen/AArch64/global-merge-group-by-use.ll +++ b/test/CodeGen/AArch64/global-merge-group-by-use.ll @@ -88,8 +88,8 @@ define void @f5(i32 %a1) #0 { ; CHECK-DAG: .zerofill __DATA,__bss,_o5,4,2 -; CHECK-DAG: .zerofill __DATA,__bss,[[SET1]],8,3 -; CHECK-DAG: .zerofill __DATA,__bss,[[SET2]],12,3 -; CHECK-DAG: .zerofill __DATA,__bss,[[SET3]],12,3 +; CHECK-DAG: .zerofill __DATA,__bss,[[SET1]],8,2 +; CHECK-DAG: .zerofill __DATA,__bss,[[SET2]],12,2 +; CHECK-DAG: .zerofill __DATA,__bss,[[SET3]],12,2 attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll index 1c1b4f6b0452..8f5694afd5ed 100644 --- a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll +++ b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll @@ -44,9 +44,9 @@ define void @f2(i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: f3: define void @f3(i32 %a1, i32 %a2) minsize nounwind { -; CHECK-NEXT: adrp x8, [[SET]]@PAGE -; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF -; CHECK-NEXT: stp w0, w1, [x8, #8] +; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8 +; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+8 +; CHECK-NEXT: stp w0, w1, [x8] ; CHECK-NEXT: ret store i32 %a1, i32* @m3, align 4 store i32 %a2, i32* @n3, align 4 @@ -57,10 +57,9 @@ define void @f3(i32 %a1, i32 %a2) minsize nounwind { ; CHECK-LABEL: f4: define void @f4(i32 %a1, i32 %a2) nounwind { -; CHECK-NEXT: adrp x8, [[SET]]@PAGE -; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF +; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8 ; CHECK-NEXT: adrp x9, _n4@PAGE -; CHECK-NEXT: str w0, [x8, #8] +; CHECK-NEXT: str w0, [x8, [[SET]]@PAGEOFF+8] ; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF] ; CHECK-NEXT: ret store i32 %a1, i32* @m3, align 4 @@ -68,7 +67,7 @@ define void @f4(i32 %a1, i32 %a2) nounwind { ret void } -; CHECK-DAG: .zerofill __DATA,__bss,[[SET]],16,3 +; CHECK-DAG: .zerofill __DATA,__bss,[[SET]],16,2 ; CHECK-DAG: .zerofill __DATA,__bss,_m2,4,2 ; CHECK-DAG: .zerofill __DATA,__bss,_n2,4,2 ; CHECK-DAG: .zerofill __DATA,__bss,_n4,4,2 diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll index 97e283c972a5..be4adda59746 100644 --- a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll +++ b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll @@ -38,9 +38,9 @@ define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 { ; CHECK-LABEL: f3: define void @f3(i32 %a1, i32 %a2) #0 { -; CHECK-NEXT: adrp x8, [[SET]]@PAGE -; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF -; CHECK-NEXT: stp w0, w1, [x8, #12] +; CHECK-NEXT: adrp x8, [[SET]]@PAGE+12 +; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+12 +; CHECK-NEXT: stp w0, w1, [x8] ; CHECK-NEXT: ret store i32 %a1, i32* @m2, align 4 store i32 %a2, i32* @n2, align 4 @@ -58,7 +58,7 @@ define void @f4(i32 %a1) #0 { ret void } -; CHECK-DAG: .zerofill __DATA,__bss,[[SET]],20,4 +; CHECK-DAG: .zerofill __DATA,__bss,[[SET]],20,2 ; CHECK-DAG: .zerofill __DATA,__bss,_o2,4,2 attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll index 1cffbf3de052..80c83bd4823e 100644 --- a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll +++ b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -mtriple=arm64-apple-ios7.0 -mcpu=generic < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -mtriple=arm64-apple-ios7.0 -mcpu=generic < %s | FileCheck %s ; Function Attrs: nounwind ssp define void @test1() { diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll index 9f7dd998bc21..8bee4437f6b9 100644 --- a/test/CodeGen/AArch64/illegal-float-ops.ll +++ b/test/CodeGen/AArch64/illegal-float-ops.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-android -verify-machineinstrs -o - %s | FileCheck --check-prefix=ANDROID-AARCH64 %s @varfloat = global float 0.0 @vardouble = global double 0.0 @@ -245,3 +246,55 @@ define void @test_fmuladd(fp128 %fp128) { ret void } + +define void @test_exp_finite(double %double) #0 { + %expdouble = call double @llvm.exp.f64(double %double) + store double %expdouble, double* @vardouble + ; ANDROID-AARCH64-NOT: bl __exp_finite + ; CHECK: bl __exp_finite + + ret void +} + +define void @test_exp2_finite(double %double) #0 { + %expdouble = call double @llvm.exp2.f64(double %double) + store double %expdouble, double* @vardouble + ; ANDROID-AARCH64-NOT: bl __exp2_finite + ; CHECK: bl __exp2_finite + + ret void +} + +define void @test_log_finite(double %double) #0 { + %logdouble = call double @llvm.log.f64(double %double) + store double %logdouble, double* @vardouble + ; ANDROID-AARCH64-NOT: bl __log_finite + ; CHECK: bl __log_finite + ret void +} + +define void @test_log2_finite(double %double) #0 { + %log2double = call double @llvm.log2.f64(double %double) + store double %log2double, double* @vardouble + ; ANDROID-AARCH64-NOT: bl __log2_finite + ; CHECK: bl __log2_finite + ret void +} + +define void @test_log10_finite(double %double) #0 { + %log10double = call double @llvm.log10.f64(double %double) + store double %log10double, double* @vardouble + ; ANDROID-AARCH64-NOT: bl __log10_finite + ; CHECK: bl __log10_finite + ret void +} + +define void @test_pow_finite(double %double) #0 { + %powdouble = call double @llvm.pow.f64(double %double, double %double) + store double %powdouble, double* @vardouble + ; ANDROID-AARCH64-NOT: bl __pow_finite + ; CHECK: bl __pow_finite + ret void +} + +attributes #0 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" } diff --git a/test/CodeGen/AArch64/inlineasm-S-constraint.ll b/test/CodeGen/AArch64/inlineasm-S-constraint.ll new file mode 100644 index 000000000000..3fb2a3f32cea --- /dev/null +++ b/test/CodeGen/AArch64/inlineasm-S-constraint.ll @@ -0,0 +1,20 @@ +;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +@var = global i32 0 +define void @test_inline_constraint_S() { +; CHECK-LABEL: test_inline_constraint_S: + call void asm sideeffect "adrp x0, $0", "S"(i32* @var) + call void asm sideeffect "add x0, x0, :lo12:$0", "S"(i32* @var) +; CHECK: adrp x0, var +; CHECK: add x0, x0, :lo12:var + ret void +} +define i32 @test_inline_constraint_S_label(i1 %in) { +; CHECK-LABEL: test_inline_constraint_S_label: + call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc)) +; CHECK: adr x0, .Ltmp{{[0-9]+}} +br i1 %in, label %loc, label %loc2 +loc: + ret i32 0 +loc2: + ret i32 42 +} diff --git a/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll b/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll new file mode 100644 index 000000000000..be2dffde8243 --- /dev/null +++ b/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll @@ -0,0 +1,394 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=38149 + +; We are truncating from wider width, and then sign-extending +; back to the original width. Then we inequality-comparing orig and src. +; If they don't match, then we had signed truncation during truncation. + +; This can be expressed in a several ways in IR: +; trunc + sext + icmp ne <- not canonical +; shl + ashr + icmp ne +; add + icmp ult +; add + icmp uge/ugt +; However only the simplest form (with two shifts) gets lowered best. + +; ---------------------------------------------------------------------------- ; +; shl + ashr + icmp ne +; ---------------------------------------------------------------------------- ; + +define i1 @shifts_necmp_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: shifts_necmp_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = shl i16 %x, 8 ; 16-8 + %tmp1 = ashr exact i16 %tmp0, 8 ; 16-8 + %tmp2 = icmp ne i16 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_necmp_i32_i16(i32 %x) nounwind { +; CHECK-LABEL: shifts_necmp_i32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = shl i32 %x, 16 ; 32-16 + %tmp1 = ashr exact i32 %tmp0, 16 ; 32-16 + %tmp2 = icmp ne i32 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_necmp_i32_i8(i32 %x) nounwind { +; CHECK-LABEL: shifts_necmp_i32_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = shl i32 %x, 24 ; 32-8 + %tmp1 = ashr exact i32 %tmp0, 24 ; 32-8 + %tmp2 = icmp ne i32 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_necmp_i64_i32(i64 %x) nounwind { +; CHECK-LABEL: shifts_necmp_i64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = shl i64 %x, 32 ; 64-32 + %tmp1 = ashr exact i64 %tmp0, 32 ; 64-32 + %tmp2 = icmp ne i64 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_necmp_i64_i16(i64 %x) nounwind { +; CHECK-LABEL: shifts_necmp_i64_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = shl i64 %x, 48 ; 64-16 + %tmp1 = ashr exact i64 %tmp0, 48 ; 64-16 + %tmp2 = icmp ne i64 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_necmp_i64_i8(i64 %x) nounwind { +; CHECK-LABEL: shifts_necmp_i64_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = shl i64 %x, 56 ; 64-8 + %tmp1 = ashr exact i64 %tmp0, 56 ; 64-8 + %tmp2 = icmp ne i64 %tmp1, %x + ret i1 %tmp2 +} + +; ---------------------------------------------------------------------------- ; +; add + icmp ult +; ---------------------------------------------------------------------------- ; + +define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, #128 // =128 +; CHECK-NEXT: ubfx w8, w8, #8, #8 +; CHECK-NEXT: cmp w8, #255 // =255 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i16 %x, -128 ; ~0U << (8-1) + %tmp1 = icmp ult i16 %tmp0, -256 ; ~0U << 8 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, #8, lsl #12 // =32768 +; CHECK-NEXT: cmn w8, #16, lsl #12 // =65536 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i32 %x, -32768 ; ~0U << (16-1) + %tmp1 = icmp ult i32 %tmp0, -65536 ; ~0U << 16 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i32_i8(i32 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i32_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, #128 // =128 +; CHECK-NEXT: cmn w8, #256 // =256 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i32 %x, -128 ; ~0U << (8-1) + %tmp1 = icmp ult i32 %tmp0, -256 ; ~0U << 8 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i64_i32(i64 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-2147483648 +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: mov x9, #-4294967296 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1) + %tmp1 = icmp ult i64 %tmp0, -4294967296 ; ~0U << 32 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i64_i16(i64 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i64_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #8, lsl #12 // =32768 +; CHECK-NEXT: cmn x8, #16, lsl #12 // =65536 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i64 %x, -32768 ; ~0U << (16-1) + %tmp1 = icmp ult i64 %tmp0, -65536 ; ~0U << 16 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i64_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #128 // =128 +; CHECK-NEXT: cmn x8, #256 // =256 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i64 %x, -128 ; ~0U << (8-1) + %tmp1 = icmp ult i64 %tmp0, -256 ; ~0U << 8 + ret i1 %tmp1 +} + +; ---------------------------------------------------------------------------- ; +; add + icmp uge +; ---------------------------------------------------------------------------- ; + +define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = add i32 %x, 32768 ; 1U << (16-1) + %tmp1 = icmp uge i32 %tmp0, 65536 ; 1U << 16 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i32_i8(i32 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i32_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = add i32 %x, 128 ; 1U << (8-1) + %tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) + %tmp1 = icmp uge i64 %tmp0, 4294967296 ; 1U << 32 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i64_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = add i64 %x, 32768 ; 1U << (16-1) + %tmp1 = icmp uge i64 %tmp0, 65536 ; 1U << 16 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i64_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = add i64 %x, 128 ; 1U << (8-1) + %tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +; Slightly more canonical variant +define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: add_ugtcmp_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ugt i16 %tmp0, 255 ; (1U << 8) - 1 + ret i1 %tmp1 +} + +; Negative tests +; ---------------------------------------------------------------------------- ; + +; Adding not a constant +define i1 @add_ugecmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: add_ugecmp_bad_i16_i8_add: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #255 // =255 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i16 %x, %y + %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +; Comparing not with a constant +define i1 @add_ugecmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: add_ugecmp_bad_i16_i8_cmp: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #128 // =128 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, w1, uxth +; CHECK-NEXT: cset w0, hs +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp uge i16 %tmp0, %y + ret i1 %tmp1 +} + +; Second constant is not larger than the first one +define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind { +; CHECK-LABEL: add_ugecmp_bad_i8_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #128 // =128 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #127 // =127 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp uge i16 %tmp0, 128 ; 1U << (8-1) + ret i1 %tmp1 +} + +; First constant is not power of two +define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { +; CHECK-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #192 // =192 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #255 // =255 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1)) + %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +; Second constant is not power of two +define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind { +; CHECK-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #128 // =128 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #767 // =767 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp uge i16 %tmp0, 768 ; (1U << 8)) + (1U << (8+1)) + ret i1 %tmp1 +} + +; Magic check fails, 64 << 1 != 256 +define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind { +; CHECK-LABEL: add_ugecmp_bad_i16_i8_magic: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #64 // =64 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #255 // =255 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 64 ; 1U << (8-1-1) + %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +; Bad 'destination type' +define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind { +; CHECK-LABEL: add_ugecmp_bad_i16_i4: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #8 // =8 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #15 // =15 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 8 ; 1U << (4-1) + %tmp1 = icmp uge i16 %tmp0, 16 ; 1U << 4 + ret i1 %tmp1 +} + +; Bad storage type +define i1 @add_ugecmp_bad_i24_i8(i24 %x) nounwind { +; CHECK-LABEL: add_ugecmp_bad_i24_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #128 // =128 +; CHECK-NEXT: and w8, w8, #0xffffff +; CHECK-NEXT: cmp w8, #255 // =255 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i24 %x, 128 ; 1U << (8-1) + %tmp1 = icmp uge i24 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +; Slightly more canonical variant +define i1 @add_ugtcmp_bad_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: add_ugtcmp_bad_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ugt i16 %tmp0, -1 ; when we +1 it, it will wrap to 0 + ret i1 %tmp1 +} diff --git a/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll b/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll index 35117a147eeb..951bd4ada3c9 100644 --- a/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll +++ b/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll @@ -115,11 +115,11 @@ entry: %C = getelementptr inbounds [12 x i8], [12 x i8]* %a2, i64 0, i64 4 %1 = bitcast i8* %C to i64* store i64 0, i64* %1, align 4 - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 8, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/ldst-miflags.mir b/test/CodeGen/AArch64/ldst-miflags.mir new file mode 100644 index 000000000000..73ca0cae9f47 --- /dev/null +++ b/test/CodeGen/AArch64/ldst-miflags.mir @@ -0,0 +1,99 @@ +# RUN: llc -run-pass=aarch64-ldst-opt -o - -mtriple=aarch64-- %s | FileCheck %s +# Check that we merge the MIFlags from both the instructions in the final +# instruction. +--- +name: case11 +# CHECK-LABEL: name: case11 +body: | + bb.0: + frame-setup STRWui $w1, $x0, 1 :: (store 4) + $w1 = frame-destroy LDRWui $x0, 1 :: (load 4) + + ; CHECK: frame-setup STRWui + ; CHECK-NOT: frame-setup + ; CHECK-NEXT: frame-destroy ORRWrs + ; No merging happening here, make sure we keep the flags of the previous + ; instruction. + RET_ReallyLR + +... +--- +name: case12 +# CHECK-LABEL: name: case12 +body: | + bb.0: + frame-setup STRWui $w1, $x0, 1 :: (store 4) + $w2 = frame-destroy LDRHHui $x0, 2 :: (load 2) + + ; CHECK: frame-setup STRWui + ; CHECK-NOT: frame-setup + ; CHECK-NEXT: frame-destroy ANDWri + ; No merging happening here, make sure we keep the flags of the previous + ; instruction. + RET_ReallyLR + +... +--- +name: case13 +# CHECK-LABEL: name: case13 +body: | + bb.0: + frame-setup STRWui $w1, $x0, 1 :: (store 4) + $w2 = frame-destroy LDRHHui $x0, 3 :: (load 2) + + ; CHECK: frame-setup STRWui + ; CHECK-NOT: frame-setup + ; CHECK-NEXT: frame-destroy UBFMWri + ; No merging happening here, make sure we keep the flags of the previous + ; instruction. + RET_ReallyLR + +... +--- +name: case2 +# CHECK-LABEL: name: case2 +body: | + bb.0: + frame-setup STRHHui $wzr, $x0, 0 :: (store 4) + frame-destroy STRHHui $wzr, $x0, 1 :: (store 4) + + ; CHECK: frame-setup frame-destroy STRWui + RET_ReallyLR + +... +--- +name: case3 +# CHECK-LABEL: name: case3 +body: | + bb.0: + + $x0 = frame-setup LDRXui $x2, 0 :: (load 8) + $x1 = frame-destroy LDRXui $x2, 1 :: (load 8) + + ; CHECK: frame-setup frame-destroy LDPXi + RET_ReallyLR +... +--- +name: case4 +# CHECK-LABEL: name: case4 +body: | + bb.0: + $x26, $x25 = frame-setup LDPXi $sp, 0 + $sp = frame-destroy ADDXri $sp, 64, 0 + + ; CHECK: = frame-setup frame-destroy LDPXpost + RET_ReallyLR + +... +--- +name: case41 +# CHECK-LABEL: name: case41 +body: | + bb.0: + $x26 = frame-setup LDRXui $sp, 0 + $sp = frame-destroy ADDXri $sp, 64, 0 + + ; CHECK: = frame-setup frame-destroy LDRXpost + RET_ReallyLR + +... diff --git a/test/CodeGen/AArch64/ldst-opt-aa.mir b/test/CodeGen/AArch64/ldst-opt-aa.mir index 808926ae3cd1..a7a47278a4e9 100644 --- a/test/CodeGen/AArch64/ldst-opt-aa.mir +++ b/test/CodeGen/AArch64/ldst-opt-aa.mir @@ -14,17 +14,17 @@ ... --- # CHECK-LABEL: name: ldr_str_aa -# CHECK: %w8, %w9 = LDPWi %x1, 0 -# CHECK: STPWi %w8, %w9, %x0, 0 +# CHECK: $w8, $w9 = LDPWi $x1, 0 +# CHECK: STPWi $w8, $w9, $x0, 0 name: ldr_str_aa tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1 + liveins: $x0, $x1 - %w8 = LDRWui %x1, 0 :: (load 4 from %ir.y) - STRWui killed %w8, %x0, 0 :: (store 4 into %ir.x) - %w9 = LDRWui killed %x1, 1 :: (load 4 from %ir.arrayidx2) - STRWui killed %w9, killed %x0, 1 :: (store 4 into %ir.arrayidx3) - RET undef %lr + $w8 = LDRWui $x1, 0 :: (load 4 from %ir.y) + STRWui killed $w8, $x0, 0 :: (store 4 into %ir.x) + $w9 = LDRWui killed $x1, 1 :: (load 4 from %ir.arrayidx2) + STRWui killed $w9, killed $x0, 1 :: (store 4 into %ir.arrayidx3) + RET undef $lr diff --git a/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir b/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir index 75ad849e4f36..f42711cbc8fc 100644 --- a/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir +++ b/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir @@ -10,17 +10,17 @@ # schedulers reordering instructions such that SUBS doesn't appear # between stores. # CHECK-LABEL: name: no-clobber-zr -# CHECK: STPXi %xzr, %xzr, %x0, 0 +# CHECK: STPXi $xzr, $xzr, $x0, 0 name: no-clobber-zr body: | bb.0: - liveins: %x0, %x1 - STRXui %xzr, %x0, 0 :: (store 8 into %ir.p) - dead %xzr = SUBSXri killed %x1, 0, 0, implicit-def %nzcv - %w8 = CSINCWr %wzr, %wzr, 1, implicit killed %nzcv - STRXui %xzr, killed %x0, 1 :: (store 8 into %ir.p) - %w0 = ORRWrs %wzr, killed %w8, 0 - RET %lr, implicit %w0 + liveins: $x0, $x1 + STRXui $xzr, $x0, 0 :: (store 8 into %ir.p) + dead $xzr = SUBSXri killed $x1, 0, 0, implicit-def $nzcv + $w8 = CSINCWr $wzr, $wzr, 1, implicit killed $nzcv + STRXui $xzr, killed $x0, 1 :: (store 8 into %ir.p) + $w0 = ORRWrs $wzr, killed $w8, 0 + RET $lr, implicit $w0 ... diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index e416dcb0f16a..ae3f59ee8f5d 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -1671,7 +1671,7 @@ entry: ; CHECK-LABEL: bug34674: ; CHECK: // %entry ; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr -; CHECK-DAG: stp [[ZREG]], [[ZREG]], [x0] +; CHECK-DAG: stp xzr, xzr, [x0] ; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1 define i64 @bug34674(<2 x i64>* %p) { entry: diff --git a/test/CodeGen/AArch64/ldst-opt.mir b/test/CodeGen/AArch64/ldst-opt.mir index 9cb9528cc62e..0d583d06434a 100644 --- a/test/CodeGen/AArch64/ldst-opt.mir +++ b/test/CodeGen/AArch64/ldst-opt.mir @@ -4,9 +4,9 @@ name: promote-load-from-store tracksRegLiveness: true body: | bb.0: - liveins: %w1, %x0, %lr + liveins: $w1, $x0, $lr - STRWui killed %w1, %x0, 0 :: (store 4) + STRWui killed $w1, $x0, 0 :: (store 4) CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 @@ -27,22 +27,22 @@ body: | CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 - %w0 = LDRHHui killed %x0, 1 :: (load 2) - RET %lr, implicit %w0 + $w0 = LDRHHui killed $x0, 1 :: (load 2) + RET $lr, implicit $w0 ... # Don't count transient instructions towards search limits. # CHECK-LABEL: name: promote-load-from-store -# CHECK: STRWui %w1 -# CHECK: UBFMWri killed %w1 +# CHECK: STRWui $w1 +# CHECK: UBFMWri killed $w1 --- name: store-pair tracksRegLiveness: true body: | bb.0: - liveins: %w1, %x0, %lr + liveins: $w1, $x0, $lr - STRWui %w1, %x0, 0 :: (store 4) + STRWui $w1, $x0, 0 :: (store 4) CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 @@ -63,8 +63,8 @@ body: | CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 - STRWui killed %w1, killed %x0, 1 :: (store 4) - RET %lr + STRWui killed $w1, killed $x0, 1 :: (store 4) + RET $lr ... # CHECK-LABEL: name: store-pair @@ -74,110 +74,110 @@ name: store-pair-clearkill0 tracksRegLiveness: true body: | bb.0: - liveins: %w1, %x0, %lr + liveins: $w1, $x0, $lr - STRWui %w1, %x0, 0 :: (store 4) - %w2 = COPY %w1 - %x3 = COPY %x0 - STRWui killed %w1, killed %x0, 1 :: (store 4) - RET %lr + STRWui $w1, $x0, 0 :: (store 4) + $w2 = COPY $w1 + $x3 = COPY $x0 + STRWui killed $w1, killed $x0, 1 :: (store 4) + RET $lr ... # When merging a lower store with an upper one, we must clear kill flags on # the lower store. # CHECK-LABEL: store-pair-clearkill0 -# CHECK-NOT: STPWi %w1, killed %w1, %x0, 0 :: (store 4) -# CHECK: STPWi %w1, %w1, %x0, 0 :: (store 4) -# CHECK: %w2 = COPY %w1 -# CHECK: RET %lr +# CHECK-NOT: STPWi $w1, killed $w1, $x0, 0 :: (store 4) +# CHECK: STPWi $w1, $w1, $x0, 0 :: (store 4) +# CHECK: $w2 = COPY $w1 +# CHECK: RET $lr --- name: store-pair-clearkill1 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %lr + liveins: $x0, $lr - %w1 = MOVi32imm 13 - %w2 = MOVi32imm 7 - STRWui %w1, %x0, 1 :: (store 4) - %w2 = COPY killed %w1 - STRWui killed %w2, %x0, 0 :: (store 4) + $w1 = MOVi32imm 13 + $w2 = MOVi32imm 7 + STRWui $w1, $x0, 1 :: (store 4) + $w2 = COPY killed $w1 + STRWui killed $w2, $x0, 0 :: (store 4) - %w1 = MOVi32imm 42 - %w2 = MOVi32imm 7 - STRWui %w1, %x0, 0 :: (store 4) - %w2 = COPY killed %w1 - STRWui killed %w2, killed %x0, 1 :: (store 4) + $w1 = MOVi32imm 42 + $w2 = MOVi32imm 7 + STRWui $w1, $x0, 0 :: (store 4) + $w2 = COPY killed $w1 + STRWui killed $w2, killed $x0, 1 :: (store 4) - RET %lr + RET $lr ... # When merging an upper store with a lower one, kill flags along the way need -# to be removed; In this case the kill flag on %w1. +# to be removed; In this case the kill flag on $w1. # CHECK-LABEL: store-pair-clearkill1 -# CHECK: %w1 = MOVi32imm -# CHECK: %w2 = MOVi32imm -# CHECK-NOT: %w2 = COPY killed %w1 -# CHECK: %w2 = COPY %w1 -# CHECK: STPWi killed %w2, %w1, %x0, 0 +# CHECK: $w1 = MOVi32imm +# CHECK: $w2 = MOVi32imm +# CHECK-NOT: $w2 = COPY killed $w1 +# CHECK: $w2 = COPY $w1 +# CHECK: STPWi killed $w2, $w1, $x0, 0 -# CHECK: %w1 = MOVi32imm -# CHECK: %w2 = MOVi32imm -# CHECK-NOT: %w2 = COPY killed %w1 -# CHECK: %w2 = COPY %w1 -# CHECK: STPWi %w1, killed %w2, killed %x0, 0 +# CHECK: $w1 = MOVi32imm +# CHECK: $w2 = MOVi32imm +# CHECK-NOT: $w2 = COPY killed $w1 +# CHECK: $w2 = COPY $w1 +# CHECK: STPWi $w1, killed $w2, killed $x0, 0 --- name: store-load-clearkill tracksRegLiveness: true body: | bb.0: - liveins: %w1 + liveins: $w1 - STRWui %w1, %sp, 0 :: (store 4) - %wzr = COPY killed %w1 ; killing use of %w1 - %w11 = LDRWui %sp, 0 :: (load 4) - HINT 0, implicit %w11 ; some use of %w11 + STRWui $w1, $sp, 0 :: (store 4) + $wzr = COPY killed $w1 ; killing use of $w1 + $w11 = LDRWui $sp, 0 :: (load 4) + HINT 0, implicit $w11 ; some use of $w11 ... # When replaceing the load of a store-load pair with a copy the kill flags # along the way need to be cleared. # CHECK-LABEL: name: store-load-clearkill -# CHECK: STRWui %w1, %sp, 0 :: (store 4) -# CHECK-NOT: COPY killed %w1 -# CHECK: %wzr = COPY %w1 -# CHECK: %w11 = ORRWrs %wzr, %w1, 0 -# CHECK: HINT 0, implicit %w11 +# CHECK: STRWui $w1, $sp, 0 :: (store 4) +# CHECK-NOT: COPY killed $w1 +# CHECK: $wzr = COPY $w1 +# CHECK: $w11 = ORRWrs $wzr, $w1, 0 +# CHECK: HINT 0, implicit $w11 --- name: promote-load-from-store-undef tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x2, %lr + liveins: $x0, $x2, $lr - STRWui undef %w1, %x0, 0 :: (store 4) - %w0 = LDRBBui %x0, 1 :: (load 2) - STRHHui undef %w3, %x2, 0 :: (store 4) - %w1 = LDRBBui %x2, 0 :: (load 4) - RET %lr, implicit %w0 + STRWui undef $w1, $x0, 0 :: (store 4) + $w0 = LDRBBui $x0, 1 :: (load 2) + STRHHui undef $w3, $x2, 0 :: (store 4) + $w1 = LDRBBui $x2, 0 :: (load 4) + RET $lr, implicit $w0 ... # CHECK-LABEL: name: promote-load-from-store-undef -# CHECK: STRWui undef %w1 -# CHECK: UBFMWri undef %w1 -# CHECK: STRHHui undef %w3 -# CHECK: ANDWri undef %w3 +# CHECK: STRWui undef $w1 +# CHECK: UBFMWri undef $w1 +# CHECK: STRHHui undef $w3 +# CHECK: ANDWri undef $w3 --- name: promote-load-from-store-trivial-kills tracksRegLiveness: true body: | bb.0: - liveins: %x0, %lr + liveins: $x0, $lr - STRXui %x0, %sp, 0 :: (store 8) - STRXui killed %x0, %sp, 2 :: (store 8) - %x0 = LDRXui %sp, 0 :: (load 8) - BL $bar, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit-def %sp - RET %lr + STRXui $x0, $sp, 0 :: (store 8) + STRXui killed $x0, $sp, 2 :: (store 8) + $x0 = LDRXui $sp, 0 :: (load 8) + BL &bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit-def $sp + RET $lr ... # CHECK-LABEL: name: promote-load-from-store-trivial-kills -# CHECK: STRXui %x0, %sp, 0 -# CHECK: STRXui %x0, %sp, 2 +# CHECK: STRXui $x0, $sp, 0 +# CHECK: STRXui $x0, $sp, 2 # CHECK-NOT: LDRXui # CHECK-NOT: ORR -# CHECK: BL $bar, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit-def %sp +# CHECK: BL &bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit-def $sp diff --git a/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/test/CodeGen/AArch64/ldst-paired-aliasing.ll index 9b0b51d369a3..0f8ffb50c8d9 100644 --- a/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -5,15 +5,16 @@ target triple = "aarch64--linux-gnu" declare void @f(i8*, i8*) declare void @f2(i8*, i8*) declare void @_Z5setupv() -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #3 define i32 @main() local_unnamed_addr #1 { ; Make sure the stores happen in the correct order (the exact instructions could change). ; CHECK-LABEL: main: -; CHECK: str xzr, [sp, #80] +; CHECK: stp xzr, xzr, [sp, #72] ; CHECK: str w9, [sp, #80] -; CHECK: stp q0, q0, [sp, #48] +; CHECK: str q0, [sp, #48] ; CHECK: ldr w8, [sp, #48] +; CHECK: str q0, [sp, #64] for.body.lr.ph.i.i.i.i.i.i63: %b1 = alloca [10 x i32], align 16 @@ -23,7 +24,7 @@ for.body.lr.ph.i.i.i.i.i.i63: tail call void @_Z5setupv() %x2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 6 %x3 = bitcast i32* %x2 to i8* - call void @llvm.memset.p0i8.i64(i8* %x3, i8 0, i64 16, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %x3, i8 0, i64 16, i1 false) %arraydecay2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 0 %x4 = bitcast [10 x i32]* %b1 to <4 x i32>* store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %x4, align 16 diff --git a/test/CodeGen/AArch64/ldst-zero.ll b/test/CodeGen/AArch64/ldst-zero.ll index 7d443a631f91..0ada6fd84cbf 100644 --- a/test/CodeGen/AArch64/ldst-zero.ll +++ b/test/CodeGen/AArch64/ldst-zero.ll @@ -3,7 +3,7 @@ ; Tests to check that zero stores which are generated as STP xzr, xzr aren't ; scheduled incorrectly due to incorrect alias information -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) %struct.tree_common = type { i8*, i8*, i32 } ; Original test case which exhibited the bug @@ -14,7 +14,7 @@ define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) { ; CHECK-DAG: str xzr, [x0] entry: %0 = bitcast %struct.tree_common* %t to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false) %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2 store i32 %code, i32* %code1, align 8 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 diff --git a/test/CodeGen/AArch64/live-interval-analysis.mir b/test/CodeGen/AArch64/live-interval-analysis.mir index 93dfcf507fff..8e705b9e7719 100644 --- a/test/CodeGen/AArch64/live-interval-analysis.mir +++ b/test/CodeGen/AArch64/live-interval-analysis.mir @@ -14,9 +14,9 @@ name: reserved_reg_liveness tracksRegLiveness: true body: | bb.0: - liveins: %x28 - %6 : xseqpairsclass = COPY %x28_fp - %x28_fp = COPY %6 - %x28 = COPY %x28 - %fp = COPY %fp + liveins: $x28 + %6 : xseqpairsclass = COPY $x28_fp + $x28_fp = COPY %6 + $x28 = COPY $x28 + $fp = COPY $fp ... diff --git a/test/CodeGen/AArch64/load-combine.ll b/test/CodeGen/AArch64/load-combine.ll index f0ed40357f12..b9879b555aa7 100644 --- a/test/CodeGen/AArch64/load-combine.ll +++ b/test/CodeGen/AArch64/load-combine.ll @@ -8,7 +8,7 @@ define i32 @load_i32_by_i8_unaligned(i32* %arg) { ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 - %tmp2 = load i8, i8* %tmp, align 1 + %tmp2 = load i8, i8* %tmp1, align 1 %tmp3 = zext i8 %tmp2 to i32 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 %tmp5 = load i8, i8* %tmp4, align 1 @@ -36,7 +36,7 @@ define i32 @load_i32_by_i8_aligned(i32* %arg) { ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 - %tmp2 = load i8, i8* %tmp, align 4 + %tmp2 = load i8, i8* %tmp1, align 4 %tmp3 = zext i8 %tmp2 to i32 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 %tmp5 = load i8, i8* %tmp4, align 1 diff --git a/test/CodeGen/AArch64/loh.mir b/test/CodeGen/AArch64/loh.mir index ee62c339cf08..b51194aed119 100644 --- a/test/CodeGen/AArch64/loh.mir +++ b/test/CodeGen/AArch64/loh.mir @@ -22,171 +22,171 @@ tracksRegLiveness: true body: | bb.0: ; CHECK: Adding MCLOH_AdrpAdrp: - ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g3 - ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g4 + ; CHECK-NEXT: $x1 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: $x1 = ADRP target-flags(aarch64-page) @g4 ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: - ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g2 - ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: $x1 = ADRP target-flags(aarch64-page) @g2 + ; CHECK-NEXT: $x1 = ADRP target-flags(aarch64-page) @g3 ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: - ; CHECK-NEXT: %x0 = ADRP target-flags(aarch64-page) @g0 - ; CHECK-NEXT: %x0 = ADRP target-flags(aarch64-page) @g1 - %x0 = ADRP target-flags(aarch64-page) @g0 - %x0 = ADRP target-flags(aarch64-page) @g1 - %x1 = ADRP target-flags(aarch64-page) @g2 - %x1 = ADRP target-flags(aarch64-page) @g3 - %x1 = ADRP target-flags(aarch64-page) @g4 + ; CHECK-NEXT: $x0 = ADRP target-flags(aarch64-page) @g0 + ; CHECK-NEXT: $x0 = ADRP target-flags(aarch64-page) @g1 + $x0 = ADRP target-flags(aarch64-page) @g0 + $x0 = ADRP target-flags(aarch64-page) @g1 + $x1 = ADRP target-flags(aarch64-page) @g2 + $x1 = ADRP target-flags(aarch64-page) @g3 + $x1 = ADRP target-flags(aarch64-page) @g4 bb.1: ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %x20 = ADRP target-flags(aarch64-page) @g0 - ; CHECK-NEXT: %x3 = ADDXri %x20, target-flags(aarch64-pageoff) @g0 + ; CHECK-NEXT: $x20 = ADRP target-flags(aarch64-page) @g0 + ; CHECK-NEXT: $x3 = ADDXri $x20, target-flags(aarch64-pageoff) @g0 ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g0 - ; CHECK-NEXT: %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0 - %x1 = ADRP target-flags(aarch64-page) @g0 - %x9 = SUBXri undef %x11, 5, 0 ; should not affect MCLOH formation - %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0, 0 - %x20 = ADRP target-flags(aarch64-page) @g0 + ; CHECK-NEXT: $x1 = ADRP target-flags(aarch64-page) @g0 + ; CHECK-NEXT: $x1 = ADDXri $x1, target-flags(aarch64-pageoff) @g0 + $x1 = ADRP target-flags(aarch64-page) @g0 + $x9 = SUBXri undef $x11, 5, 0 ; should not affect MCLOH formation + $x1 = ADDXri $x1, target-flags(aarch64-pageoff) @g0, 0 + $x20 = ADRP target-flags(aarch64-page) @g0 BL @extfunc, csr_aarch64_aapcs ; should not clobber X20 - %x3 = ADDXri %x20, target-flags(aarch64-pageoff) @g0, 0 + $x3 = ADDXri $x20, target-flags(aarch64-pageoff) @g0, 0 bb.2: ; CHECK-NOT: MCLOH_AdrpAdd - %x9 = ADRP target-flags(aarch64-page) @g0 + $x9 = ADRP target-flags(aarch64-page) @g0 BL @extfunc, csr_aarch64_aapcs ; clobbers x9 - ; Verification requires the use of 'undef' in front of the clobbered %x9 - %x9 = ADDXri undef %x9, target-flags(aarch64-pageoff) @g0, 0 + ; Verification requires the use of 'undef' in front of the clobbered $x9 + $x9 = ADDXri undef $x9, target-flags(aarch64-pageoff) @g0, 0 bb.3: ; CHECK-NOT: MCLOH_AdrpAdd - %x10 = ADRP target-flags(aarch64-page) @g0 - HINT 0, implicit def %x10 ; clobbers x10 - %x10 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + $x10 = ADRP target-flags(aarch64-page) @g0 + HINT 0, implicit def $x10 ; clobbers x10 + $x10 = ADDXri $x10, target-flags(aarch64-pageoff) @g0, 0 bb.4: ; Cannot produce a LOH for multiple users ; CHECK-NOT: MCLOH_AdrpAdd - %x10 = ADRP target-flags(aarch64-page) @g0 - HINT 0, implicit def %x10 ; clobbers x10 - %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 - %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + $x10 = ADRP target-flags(aarch64-page) @g0 + HINT 0, implicit def $x10 ; clobbers x10 + $x11 = ADDXri $x10, target-flags(aarch64-pageoff) @g0, 0 + $x12 = ADDXri $x10, target-flags(aarch64-pageoff) @g0, 0 bb.5: ; CHECK-NEXT: Adding MCLOH_AdrpLdr: - ; CHECK-NEXT: %x5 = ADRP target-flags(aarch64-page) @g2 - ; CHECK-NEXT: %s6 = LDRSui %x5, target-flags(aarch64-pageoff) @g2 + ; CHECK-NEXT: $x5 = ADRP target-flags(aarch64-page) @g2 + ; CHECK-NEXT: $s6 = LDRSui $x5, target-flags(aarch64-pageoff) @g2 ; CHECK-NEXT: Adding MCLOH_AdrpLdr: - ; CHECK-NEXT: %x4 = ADRP target-flags(aarch64-page) @g2 - ; CHECK-NEXT: %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2 - %x4 = ADRP target-flags(aarch64-page) @g2 - %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2 - %x5 = ADRP target-flags(aarch64-page) @g2 - %s6 = LDRSui %x5, target-flags(aarch64-pageoff) @g2 + ; CHECK-NEXT: $x4 = ADRP target-flags(aarch64-page) @g2 + ; CHECK-NEXT: $x4 = LDRXui $x4, target-flags(aarch64-pageoff) @g2 + $x4 = ADRP target-flags(aarch64-page) @g2 + $x4 = LDRXui $x4, target-flags(aarch64-pageoff) @g2 + $x5 = ADRP target-flags(aarch64-page) @g2 + $s6 = LDRSui $x5, target-flags(aarch64-pageoff) @g2 bb.6: ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: - ; CHECK-NEXT: %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2 - ; CHECK-NEXT: %x6 = LDRXui %x5, target-flags(aarch64-pageoff, aarch64-got) @g2 + ; CHECK-NEXT: $x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + ; CHECK-NEXT: $x6 = LDRXui $x5, target-flags(aarch64-pageoff, aarch64-got) @g2 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: - ; CHECK-NEXT: %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2 - ; CHECK-NEXT: %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2 - %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2 - %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2 - %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2 - %x6 = LDRXui %x5, target-flags(aarch64-pageoff, aarch64-got) @g2 + ; CHECK-NEXT: $x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + ; CHECK-NEXT: $x4 = LDRXui $x4, target-flags(aarch64-pageoff, aarch64-got) @g2 + $x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + $x4 = LDRXui $x4, target-flags(aarch64-pageoff, aarch64-got) @g2 + $x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + $x6 = LDRXui $x5, target-flags(aarch64-pageoff, aarch64-got) @g2 bb.7: ; CHECK-NOT: Adding MCLOH_AdrpLdrGot: ; Loading a float value from a GOT table makes no sense so this should not ; produce an LOH. - %x11 = ADRP target-flags(aarch64-page, aarch64-got) @g5 - %s11 = LDRSui %x11, target-flags(aarch64-pageoff, aarch64-got) @g5 + $x11 = ADRP target-flags(aarch64-page, aarch64-got) @g5 + $s11 = LDRSui $x11, target-flags(aarch64-pageoff, aarch64-got) @g5 bb.8: ; CHECK-NEXT: Adding MCLOH_AdrpAddLdr: - ; CHECK-NEXT: %x7 = ADRP target-flags(aarch64-page) @g3 - ; CHECK-NEXT: %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3 - ; CHECK-NEXT: %d1 = LDRDui %x8, 8 - %x7 = ADRP target-flags(aarch64-page) @g3 - %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3, 0 - %d1 = LDRDui %x8, 8 + ; CHECK-NEXT: $x7 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: $x8 = ADDXri $x7, target-flags(aarch64-pageoff) @g3 + ; CHECK-NEXT: $d1 = LDRDui $x8, 8 + $x7 = ADRP target-flags(aarch64-page) @g3 + $x8 = ADDXri $x7, target-flags(aarch64-pageoff) @g3, 0 + $d1 = LDRDui $x8, 8 bb.9: ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %x3 = ADRP target-flags(aarch64-page) @g3 - ; CHECK-NEXT: %x3 = ADDXri %x3, target-flags(aarch64-pageoff) @g3 + ; CHECK-NEXT: $x3 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: $x3 = ADDXri $x3, target-flags(aarch64-pageoff) @g3 ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %x5 = ADRP target-flags(aarch64-page) @g3 - ; CHECK-NEXT: %x2 = ADDXri %x5, target-flags(aarch64-pageoff) @g3 + ; CHECK-NEXT: $x5 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: $x2 = ADDXri $x5, target-flags(aarch64-pageoff) @g3 ; CHECK-NEXT: Adding MCLOH_AdrpAddStr: - ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g3 - ; CHECK-NEXT: %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3 - ; CHECK-NEXT: STRXui %xzr, %x1, 16 - %x1 = ADRP target-flags(aarch64-page) @g3 - %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3, 0 - STRXui %xzr, %x1, 16 + ; CHECK-NEXT: $x1 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: $x1 = ADDXri $x1, target-flags(aarch64-pageoff) @g3 + ; CHECK-NEXT: STRXui $xzr, $x1, 16 + $x1 = ADRP target-flags(aarch64-page) @g3 + $x1 = ADDXri $x1, target-flags(aarch64-pageoff) @g3, 0 + STRXui $xzr, $x1, 16 ; This sequence should just produce an AdrpAdd (not AdrpAddStr) - %x5 = ADRP target-flags(aarch64-page) @g3 - %x2 = ADDXri %x5, target-flags(aarch64-pageoff) @g3, 0 - STRXui %x2, undef %x11, 16 + $x5 = ADRP target-flags(aarch64-page) @g3 + $x2 = ADDXri $x5, target-flags(aarch64-pageoff) @g3, 0 + STRXui $x2, undef $x11, 16 ; This sequence should just produce an AdrpAdd (not AdrpAddStr) - %x3 = ADRP target-flags(aarch64-page) @g3 - %x3 = ADDXri %x3, target-flags(aarch64-pageoff) @g3, 0 - STRXui %x3, %x3, 16 + $x3 = ADRP target-flags(aarch64-page) @g3 + $x3 = ADDXri $x3, target-flags(aarch64-pageoff) @g3, 0 + STRXui $x3, $x3, 16 bb.10: ; CHECK-NEXT: Adding MCLOH_AdrpLdr: - ; CHECK-NEXT: %x2 = ADRP target-flags(aarch64-page) @g3 - ; CHECK-NEXT: %x2 = LDRXui %x2, target-flags(aarch64-pageoff) @g3 + ; CHECK-NEXT: $x2 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: $x2 = LDRXui $x2, target-flags(aarch64-pageoff) @g3 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotLdr: - ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 - ; CHECK-NEXT: %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 - ; CHECK-NEXT: %x1 = LDRXui %x1, 24 - %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 - %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 - %x1 = LDRXui %x1, 24 + ; CHECK-NEXT: $x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + ; CHECK-NEXT: $x1 = LDRXui $x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + ; CHECK-NEXT: $x1 = LDRXui $x1, 24 + $x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + $x1 = LDRXui $x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + $x1 = LDRXui $x1, 24 ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotLdr) - %x2 = ADRP target-flags(aarch64-page) @g3 - %x2 = LDRXui %x2, target-flags(aarch64-pageoff) @g3 - %x2 = LDRXui %x2, 24 + $x2 = ADRP target-flags(aarch64-page) @g3 + $x2 = LDRXui $x2, target-flags(aarch64-pageoff) @g3 + $x2 = LDRXui $x2, 24 bb.11: ; CHECK-NEXT: Adding MCLOH_AdrpLdr - ; CHECK-NEXT: %x5 = ADRP target-flags(aarch64-page) @g1 - ; CHECK-NEXT: %x5 = LDRXui %x5, target-flags(aarch64-pageoff) @g1 + ; CHECK-NEXT: $x5 = ADRP target-flags(aarch64-page) @g1 + ; CHECK-NEXT: $x5 = LDRXui $x5, target-flags(aarch64-pageoff) @g1 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotStr: - ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 - ; CHECK-NEXT: %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 - ; CHECK-NEXT: STRXui %xzr, %x1, 32 - %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 - %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 - STRXui %xzr, %x1, 32 + ; CHECK-NEXT: $x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + ; CHECK-NEXT: $x1 = LDRXui $x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + ; CHECK-NEXT: STRXui $xzr, $x1, 32 + $x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + $x1 = LDRXui $x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + STRXui $xzr, $x1, 32 ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotStr) - %x5 = ADRP target-flags(aarch64-page) @g1 - %x5 = LDRXui %x5, target-flags(aarch64-pageoff) @g1 - STRXui undef %x11, %x5, 32 + $x5 = ADRP target-flags(aarch64-page) @g1 + $x5 = LDRXui $x5, target-flags(aarch64-pageoff) @g1 + STRXui undef $x11, $x5, 32 bb.12: ; CHECK-NOT: MCLOH_AdrpAdrp ; CHECK: Adding MCLOH_AdrpAddLdr - ; %x9 = ADRP @g4 - ; %x9 = ADDXri %x9, @g4 - ; %x5 = LDRXui %x9, 0 - %x9 = ADRP target-flags(aarch64-page, aarch64-got) @g4 - %x9 = ADDXri %x9, target-flags(aarch64-pageoff, aarch64-got) @g4, 0 - %x5 = LDRXui %x9, 0 - %x9 = ADRP target-flags(aarch64-page, aarch64-got) @g5 + ; $x9 = ADRP @g4 + ; $x9 = ADDXri $x9, @g4 + ; $x5 = LDRXui $x9, 0 + $x9 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + $x9 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @g4, 0 + $x5 = LDRXui $x9, 0 + $x9 = ADRP target-flags(aarch64-page, aarch64-got) @g5 bb.13: ; Cannot produce a LOH for multiple users ; CHECK-NOT: MCLOH_AdrpAdd - %x10 = ADRP target-flags(aarch64-page) @g0 - %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + $x10 = ADRP target-flags(aarch64-page) @g0 + $x11 = ADDXri $x10, target-flags(aarch64-pageoff) @g0, 0 B %bb.14 bb.14: - liveins: %x10 - %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + liveins: $x10 + $x12 = ADDXri $x10, target-flags(aarch64-pageoff) @g0, 0 ... diff --git a/test/CodeGen/AArch64/machine-combiner-madd.ll b/test/CodeGen/AArch64/machine-combiner-madd.ll index 4efe4e9cfb01..eeeafbbfa471 100644 --- a/test/CodeGen/AArch64/machine-combiner-madd.ll +++ b/test/CodeGen/AArch64/machine-combiner-madd.ll @@ -5,6 +5,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cyclone < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m1 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m2 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=kryo < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=thunderx2t99 < %s | FileCheck %s @@ -19,7 +20,7 @@ %class.D = type { %class.basic_string.base, [4 x i8] } %class.basic_string.base = type <{ i64, i64, i32 }> @a = global %class.D* zeroinitializer, align 8 -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) define internal void @fun() section ".text.startup" { entry: %tmp.i.i = alloca %class.D, align 8 @@ -31,7 +32,7 @@ loop: %x = load %class.D*, %class.D** getelementptr inbounds (%class.D*, %class.D** @a, i64 0), align 8 %arrayidx.i.i.i = getelementptr inbounds %class.D, %class.D* %x, i64 %conv11.i.i %d = bitcast %class.D* %arrayidx.i.i.i to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %y, i8* %d, i64 24, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 nonnull %y, i8* align 8 %d, i64 24, i1 false) %inc.i.i = add i64 %i, 1 %cmp.i.i = icmp slt i64 %inc.i.i, 0 br i1 %cmp.i.i, label %loop, label %exit diff --git a/test/CodeGen/AArch64/machine-combiner.ll b/test/CodeGen/AArch64/machine-combiner.ll index 358315d088db..b07788fbeef4 100644 --- a/test/CodeGen/AArch64/machine-combiner.ll +++ b/test/CodeGen/AArch64/machine-combiner.ll @@ -3,7 +3,7 @@ ; Incremental updates of the instruction depths should be enough for this test ; case. ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math \ -; RUN: -disable-post-ra -machine-combiner-inc-threshold=0 < %s | FileCheck %s +; RUN: -disable-post-ra -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s ; Verify that the first two adds are independent regardless of how the inputs are ; commuted. The destination registers are used as source registers for the third add. diff --git a/test/CodeGen/AArch64/machine-combiner.mir b/test/CodeGen/AArch64/machine-combiner.mir index 0f90ef70e4af..7b3904b694a5 100644 --- a/test/CodeGen/AArch64/machine-combiner.mir +++ b/test/CodeGen/AArch64/machine-combiner.mir @@ -1,6 +1,6 @@ # RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -enable-unsafe-fp-math \ # RUN: -run-pass machine-combiner -machine-combiner-inc-threshold=0 \ -# RUN: -verify-machineinstrs -o - %s | FileCheck %s +# RUN: -machine-combiner-verify-pattern-order=true -verify-machineinstrs -o - %s | FileCheck %s --- # Test incremental depth updates succeed when triggered after the removal of # the first instruction in a basic block. @@ -22,27 +22,27 @@ body: | bb.0: successors: %bb.1, %bb.2 - %3 = COPY %w2 - %2 = COPY %w1 - %1 = COPY %w0 - %0 = COPY %d0 - %4 = SUBSWrr %1, %2, implicit-def %nzcv - Bcc 13, %bb.2, implicit %nzcv + %3 = COPY $w2 + %2 = COPY $w1 + %1 = COPY $w0 + %0 = COPY $d0 + %4 = SUBSWrr %1, %2, implicit-def $nzcv + Bcc 13, %bb.2, implicit $nzcv B %bb.1 bb.1: ; CHECK: MADDWrrr %1, %2, %3 - %5 = MADDWrrr %1, %2, %wzr + %5 = MADDWrrr %1, %2, $wzr %6 = ADDWrr %3, killed %5 %7 = SCVTFUWDri killed %6 ; CHECK: FMADDDrrr %7, %7, %0 %8 = FMULDrr %7, %7 %9 = FADDDrr %0, killed %8 - %d0 = COPY %9 - RET_ReallyLR implicit %d0 + $d0 = COPY %9 + RET_ReallyLR implicit $d0 bb.2: - %d0 = COPY %0 - RET_ReallyLR implicit %d0 + $d0 = COPY %0 + RET_ReallyLR implicit $d0 ... diff --git a/test/CodeGen/AArch64/machine-copy-remove.mir b/test/CodeGen/AArch64/machine-copy-remove.mir index 50c03ddb4037..843481d67922 100644 --- a/test/CodeGen/AArch64/machine-copy-remove.mir +++ b/test/CodeGen/AArch64/machine-copy-remove.mir @@ -2,285 +2,285 @@ --- # Check that bb.0 COPY is seen through to allow the bb.1 COPY of XZR to be removed. # CHECK-LABEL: name: test1 -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test1 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 - %x0 = COPY %x1 - CBNZX %x1, %bb.2 + $x0 = COPY $x1 + CBNZX $x1, %bb.2 bb.1: - %x0 = COPY %xzr + $x0 = COPY $xzr B %bb.3 bb.2: - liveins: %x1 + liveins: $x1 - %x0 = LDRXui %x1, 0 + $x0 = LDRXui $x1, 0 bb.3: - liveins: %x0 + liveins: $x0 - RET_ReallyLR implicit %x0 + RET_ReallyLR implicit $x0 ... # Similar to test1, but with reversed COPY. # CHECK-LABEL: name: test2 -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test2 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 - %x1 = COPY %x0 - CBNZX %x1, %bb.2 + $x1 = COPY $x0 + CBNZX $x1, %bb.2 bb.1: - %x0 = COPY %xzr + $x0 = COPY $xzr B %bb.3 bb.2: - liveins: %x1 + liveins: $x1 - %x0 = LDRXui %x1, 0 + $x0 = LDRXui $x1, 0 bb.3: - liveins: %x0 + liveins: $x0 - RET_ReallyLR implicit %x0 + RET_ReallyLR implicit $x0 ... # Similar to test1, but with a clobber that prevents removal of the XZR COPY. # CHECK-LABEL: name: test3 -# CHECK: COPY %xzr +# CHECK: COPY $xzr name: test3 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 - %x0 = COPY %x1 - %x1 = LDRXui %x1, 0 - CBNZX %x1, %bb.2 + $x0 = COPY $x1 + $x1 = LDRXui $x1, 0 + CBNZX $x1, %bb.2 bb.1: - %x0 = COPY %xzr + $x0 = COPY $xzr B %bb.3 bb.2: - liveins: %x1 + liveins: $x1 - %x0 = LDRXui %x1, 0 + $x0 = LDRXui $x1, 0 bb.3: - liveins: %x0 + liveins: $x0 - RET_ReallyLR implicit %x0 + RET_ReallyLR implicit $x0 ... # Similar to test2, but with a clobber that prevents removal of the XZR COPY. # CHECK-LABEL: name: test4 -# CHECK: COPY %xzr +# CHECK: COPY $xzr name: test4 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 - %x1 = COPY %x0 - %x1 = LDRXui %x1, 0 - CBNZX %x1, %bb.2 + $x1 = COPY $x0 + $x1 = LDRXui $x1, 0 + CBNZX $x1, %bb.2 bb.1: - %x0 = COPY %xzr + $x0 = COPY $xzr B %bb.3 bb.2: - liveins: %x1 + liveins: $x1 - %x0 = LDRXui %x1, 0 + $x0 = LDRXui $x1, 0 bb.3: - liveins: %x0 + liveins: $x0 - RET_ReallyLR implicit %x0 + RET_ReallyLR implicit $x0 ... # Similar to test2, but with a clobber that prevents removal of the XZR COPY. # CHECK-LABEL: name: test5 -# CHECK: COPY %xzr +# CHECK: COPY $xzr name: test5 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 - %x1 = COPY %x0 - %x0 = LDRXui %x1, 0 - CBNZX %x1, %bb.2 + $x1 = COPY $x0 + $x0 = LDRXui $x1, 0 + CBNZX $x1, %bb.2 bb.1: - %x0 = COPY %xzr + $x0 = COPY $xzr B %bb.3 bb.2: - liveins: %x1 + liveins: $x1 - %x0 = LDRXui %x1, 0 + $x0 = LDRXui $x1, 0 bb.3: - liveins: %x0 + liveins: $x0 - RET_ReallyLR implicit %x0 + RET_ReallyLR implicit $x0 ... # Similar to test1, but with two levels of COPYs. # CHECK-LABEL: name: test6 -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test6 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 - %x2 = COPY %x0 - %x1 = COPY %x2 - CBNZX %x1, %bb.2 + $x2 = COPY $x0 + $x1 = COPY $x2 + CBNZX $x1, %bb.2 bb.1: - %x0 = COPY %xzr + $x0 = COPY $xzr B %bb.3 bb.2: - liveins: %x1 + liveins: $x1 - %x0 = LDRXui %x1, 0 + $x0 = LDRXui $x1, 0 bb.3: - liveins: %x0 + liveins: $x0 - RET_ReallyLR implicit %x0 + RET_ReallyLR implicit $x0 ... # Similar to test1, but with two levels of COPYs and a clobber preventing COPY of XZR removal. # CHECK-LABEL: name: test7 -# CHECK: COPY %xzr +# CHECK: COPY $xzr name: test7 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 - %x2 = COPY %x0 - %x0 = LDRXui %x1, 0 - %x1 = COPY %x2 - CBNZX %x1, %bb.2 + $x2 = COPY $x0 + $x0 = LDRXui $x1, 0 + $x1 = COPY $x2 + CBNZX $x1, %bb.2 bb.1: - %x0 = COPY %xzr + $x0 = COPY $xzr B %bb.3 bb.2: - liveins: %x1 + liveins: $x1 - %x0 = LDRXui %x1, 0 + $x0 = LDRXui $x1, 0 bb.3: - liveins: %x0 + liveins: $x0 - RET_ReallyLR implicit %x0 + RET_ReallyLR implicit $x0 ... # Check that the TargetRegs vector clobber update loop in # AArch64RedundantCopyElimination::optimizeCopy works correctly. # CHECK-LABEL: name: test8 -# CHECK: x0 = COPY %xzr -# CHECK: x1 = COPY %xzr +# CHECK: x0 = COPY $xzr +# CHECK: x1 = COPY $xzr name: test8 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 - %x1 = COPY %x0 - CBNZX %x1, %bb.2 + $x1 = COPY $x0 + CBNZX $x1, %bb.2 bb.1: - liveins: %x0, %x2 + liveins: $x0, $x2 - %x0, %x1 = LDPXi %x2, 0 - %x0 = COPY %xzr - %x1 = COPY %xzr + $x0, $x1 = LDPXi $x2, 0 + $x0 = COPY $xzr + $x1 = COPY $xzr B %bb.3 bb.2: - liveins: %x1 + liveins: $x1 - %x0 = LDRXui %x1, 0 + $x0 = LDRXui $x1, 0 bb.3: - liveins: %x0 + liveins: $x0 - RET_ReallyLR implicit %x0 + RET_ReallyLR implicit $x0 ... # Check that copy isn't removed from a block with multiple predecessors. # CHECK-LABEL: name: test9 -# CHECK: x0 = COPY %xzr +# CHECK: x0 = COPY $xzr # CHECK-NEXT: B %bb.3 name: test9 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 - CBNZX %x0, %bb.2 + CBNZX $x0, %bb.2 bb.1: - liveins: %x0, %x2 + liveins: $x0, $x2 - %x0 = COPY %xzr + $x0 = COPY $xzr B %bb.3 bb.2: - liveins: %x1 + liveins: $x1 - %x0 = LDRXui %x1, 0 + $x0 = LDRXui $x1, 0 - CBNZX %x1, %bb.1 + CBNZX $x1, %bb.1 bb.3: - liveins: %x0 + liveins: $x0 - RET_ReallyLR implicit %x0 + RET_ReallyLR implicit $x0 ... # Eliminate redundant MOVi32imm 7 in bb.1 # Note: 32-bit compare/32-bit move imm # Kill marker should be removed from compare. # CHECK-LABEL: name: test10 -# CHECK: SUBSWri %w0, 7, 0, implicit-def %nzcv +# CHECK: SUBSWri $w0, 7, 0, implicit-def $nzcv # CHECK: bb.1: # CHECK-NOT: MOVi32imm name: test10 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1 + liveins: $w0, $x1 - dead %wzr = SUBSWri killed %w0, 7, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $wzr = SUBSWri killed $w0, 7, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w0 = MOVi32imm 7 - STRWui killed %w0, killed %x1, 0 + $w0 = MOVi32imm 7 + STRWui killed $w0, killed $x1, 0 bb.2: RET_ReallyLR @@ -289,24 +289,24 @@ body: | # Note: 64-bit compare/32-bit move imm w/implicit def # Kill marker should be removed from compare. # CHECK-LABEL: name: test11 -# CHECK: SUBSXri %x0, 7, 0, implicit-def %nzcv +# CHECK: SUBSXri $x0, 7, 0, implicit-def $nzcv # CHECK: bb.1: # CHECK-NOT: MOVi32imm name: test11 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1 + liveins: $x0, $x1 - dead %xzr = SUBSXri killed %x0, 7, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $xzr = SUBSXri killed $x0, 7, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w0 = MOVi32imm 7, implicit-def %x0 - STRXui killed %x0, killed %x1, 0 + $w0 = MOVi32imm 7, implicit-def $x0 + STRXui killed $x0, killed $x1, 0 bb.2: RET_ReallyLR @@ -315,24 +315,24 @@ body: | # Note: 64-bit compare/32-bit move imm # Kill marker should be removed from compare. # CHECK-LABEL: name: test12 -# CHECK: SUBSXri %x0, 7, 0, implicit-def %nzcv +# CHECK: SUBSXri $x0, 7, 0, implicit-def $nzcv # CHECK: bb.1: # CHECK-NOT: MOVi32imm name: test12 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1 + liveins: $x0, $x1 - dead %xzr = SUBSXri killed %x0, 7, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $xzr = SUBSXri killed $x0, 7, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w0 = MOVi32imm 7 - STRWui killed %w0, killed %x1, 0 + $w0 = MOVi32imm 7 + STRWui killed $w0, killed $x1, 0 bb.2: RET_ReallyLR @@ -341,24 +341,24 @@ body: | # Note: 32-bit compare/32-bit move imm w/implicit def # Kill marker should remain on compare. # CHECK-LABEL: name: test13 -# CHECK: SUBSWri killed %w0, 7, 0, implicit-def %nzcv +# CHECK: SUBSWri killed $w0, 7, 0, implicit-def $nzcv # CHECK: bb.1: # CHECK: MOVi32imm name: test13 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1 + liveins: $w0, $x1 - dead %wzr = SUBSWri killed %w0, 7, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $wzr = SUBSWri killed $w0, 7, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w0 = MOVi32imm 7, implicit-def %x0 - STRXui killed %x0, killed %x1, 0 + $w0 = MOVi32imm 7, implicit-def $x0 + STRXui killed $x0, killed $x1, 0 bb.2: RET_ReallyLR @@ -371,19 +371,19 @@ name: test14 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1, %x2 + liveins: $w0, $x1, $x2 - dead %wzr = SUBSWri killed %w0, 7, 0, implicit-def %nzcv - %w0 = LDRWui %x1, 0 - STRWui killed %w0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + dead $wzr = SUBSWri killed $w0, 7, 0, implicit-def $nzcv + $w0 = LDRWui $x1, 0 + STRWui killed $w0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w0 = MOVi32imm 7 - STRWui killed %w0, killed %x1, 0 + $w0 = MOVi32imm 7 + STRWui killed $w0, killed $x1, 0 bb.2: RET_ReallyLR @@ -396,19 +396,19 @@ name: test15 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1, %x2 + liveins: $w0, $x1, $x2 - dead %wzr = SUBSWri killed %w0, 7, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $wzr = SUBSWri killed $w0, 7, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1, %x2 + liveins: $x1, $x2 - %w0 = LDRWui %x1, 0 - STRWui killed %w0, killed %x2, 0 - %w0 = MOVi32imm 7 - STRWui killed %w0, killed %x1, 0 + $w0 = LDRWui $x1, 0 + STRWui killed $w0, killed $x2, 0 + $w0 = MOVi32imm 7 + STRWui killed $w0, killed $x1, 0 bb.2: RET_ReallyLR @@ -421,18 +421,18 @@ name: test16 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1 + liveins: $w0, $x1 - dead %wzr = SUBSWri %w0, 7, 0, implicit-def %nzcv - %w2 = COPY %w0 - Bcc 1, %bb.2, implicit killed %nzcv + dead $wzr = SUBSWri $w0, 7, 0, implicit-def $nzcv + $w2 = COPY $w0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w2 = MOVi32imm 7 - STRWui killed %w2, killed %x1, 0 + $w2 = MOVi32imm 7 + STRWui killed $w2, killed $x1, 0 bb.2: RET_ReallyLR @@ -445,17 +445,17 @@ name: test17 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1 + liveins: $w0, $x1 - dead %w0 = SUBSWri killed %w0, 7, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $w0 = SUBSWri killed $w0, 7, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w0 = MOVi32imm 7 - STRWui killed %w0, killed %x1, 0 + $w0 = MOVi32imm 7 + STRWui killed $w0, killed $x1, 0 bb.2: RET_ReallyLR @@ -470,16 +470,16 @@ name: test18 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1 + liveins: $x0, $x1 - CBNZX killed %x0, %bb.2 + CBNZX killed $x0, %bb.2 B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %x0 = MOVi64imm 4252017623040 - STRXui killed %x0, killed %x1, 0 + $x0 = MOVi64imm 4252017623040 + STRXui killed $x0, killed $x1, 0 bb.2: RET_ReallyLR @@ -488,24 +488,24 @@ body: | # Note: 32-bit compare/32-bit move imm # Kill marker should be removed from compare. # CHECK-LABEL: name: test19 -# CHECK: ADDSWri %w0, 1, 0, implicit-def %nzcv +# CHECK: ADDSWri $w0, 1, 0, implicit-def $nzcv # CHECK: bb.1: # CHECK-NOT: MOVi32imm name: test19 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1 + liveins: $w0, $x1 - dead %wzr = ADDSWri killed %w0, 1, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $wzr = ADDSWri killed $w0, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w0 = MOVi32imm -1 - STRWui killed %w0, killed %x1, 0 + $w0 = MOVi32imm -1 + STRWui killed $w0, killed $x1, 0 bb.2: RET_ReallyLR @@ -514,24 +514,24 @@ body: | # Note: 64-bit compare/64-bit move imm # Kill marker should be removed from compare. # CHECK-LABEL: name: test20 -# CHECK: ADDSXri %x0, 1, 0, implicit-def %nzcv +# CHECK: ADDSXri $x0, 1, 0, implicit-def $nzcv # CHECK: bb.1: # CHECK-NOT: MOVi64imm name: test20 tracksRegLiveness: true body: | bb.0: - liveins: %x0, %x1 + liveins: $x0, $x1 - dead %xzr = ADDSXri killed %x0, 1, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $xzr = ADDSXri killed $x0, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %x0 = MOVi64imm -1 - STRXui killed %x0, killed %x1, 0 + $x0 = MOVi64imm -1 + STRXui killed $x0, killed $x1, 0 bb.2: RET_ReallyLR @@ -540,24 +540,24 @@ body: | # Note: 64-bit compare/32-bit move imm # Kill marker should be removed from compare. # CHECK-LABEL: name: test21 -# CHECK: ADDSXri %x0, 1, 0, implicit-def %nzcv +# CHECK: ADDSXri $x0, 1, 0, implicit-def $nzcv # CHECK: bb.1: # CHECK-NOT: MOVi32imm name: test21 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1 + liveins: $x0, $x1 - dead %xzr = ADDSXri killed %x0, 1, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $xzr = ADDSXri killed $x0, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w0 = MOVi32imm -1 - STRWui killed %w0, killed %x1, 0 + $w0 = MOVi32imm -1 + STRWui killed $w0, killed $x1, 0 bb.2: RET_ReallyLR @@ -571,17 +571,17 @@ name: test22 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1 + liveins: $w0, $x1 - dead %wzr = ADDSWri killed %w0, 1, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $wzr = ADDSWri killed $w0, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %x0 = MOVi64imm -1 - STRXui killed %x0, killed %x1, 0 + $x0 = MOVi64imm -1 + STRXui killed $x0, killed $x1, 0 bb.2: RET_ReallyLR @@ -594,17 +594,17 @@ name: test23 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1 + liveins: $w0, $x1 - dead %wzr = SUBSWri killed %w0, 1, 12, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + dead $wzr = SUBSWri killed $w0, 1, 12, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x1 + liveins: $x1 - %w0 = MOVi32imm 4096 - STRWui killed %w0, killed %x1, 0 + $w0 = MOVi32imm 4096 + STRWui killed $w0, killed $x1, 0 bb.2: RET_ReallyLR diff --git a/test/CodeGen/AArch64/machine-dead-copy.mir b/test/CodeGen/AArch64/machine-dead-copy.mir index cb552e5cab3d..09548f3f6083 100644 --- a/test/CodeGen/AArch64/machine-dead-copy.mir +++ b/test/CodeGen/AArch64/machine-dead-copy.mir @@ -6,35 +6,37 @@ define i32 @copyprop2(i32 %a, i32 %b) { ret i32 %a } define i32 @copyprop3(i32 %a, i32 %b) { ret i32 %a } define i32 @copyprop4(i32 %a, i32 %b) { ret i32 %a } + define i32 @copyprop5(i32 %a, i32 %b) { ret i32 %a } + define i32 @copyprop6(i32 %a, i32 %b) { ret i32 %a } declare i32 @foo(i32) ... --- # The first copy is dead copy which is not used. # CHECK-LABEL: name: copyprop1 # CHECK: bb.0: -# CHECK-NOT: %w20 = COPY +# CHECK-NOT: $w20 = COPY name: copyprop1 body: | bb.0: - liveins: %w0, %w1 - %w20 = COPY %w1 - BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 - RET_ReallyLR implicit %w0 + liveins: $w0, $w1 + $w20 = COPY $w1 + BL @foo, csr_aarch64_aapcs, implicit $w0, implicit-def $w0 + RET_ReallyLR implicit $w0 ... --- # The first copy is not a dead copy which is used in the second copy after the # call. # CHECK-LABEL: name: copyprop2 # CHECK: bb.0: -# CHECK: %w20 = COPY +# CHECK: $w20 = COPY name: copyprop2 body: | bb.0: - liveins: %w0, %w1 - %w20 = COPY %w1 - BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 - %w0 = COPY %w20 - RET_ReallyLR implicit %w0 + liveins: $w0, $w1 + $w20 = COPY $w1 + BL @foo, csr_aarch64_aapcs, implicit $w0, implicit-def $w0 + $w0 = COPY $w20 + RET_ReallyLR implicit $w0 ... --- # Both the first and second copy are dead copies which are not used. @@ -44,11 +46,11 @@ body: | name: copyprop3 body: | bb.0: - liveins: %w0, %w1 - %w20 = COPY %w1 - BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 - %w20 = COPY %w0 - RET_ReallyLR implicit %w0 + liveins: $w0, $w1 + $w20 = COPY $w1 + BL @foo, csr_aarch64_aapcs, implicit $w0, implicit-def $w0 + $w20 = COPY $w0 + RET_ReallyLR implicit $w0 ... # The second copy is removed as a NOP copy, after then the first copy become # dead which should be removed as well. @@ -58,10 +60,40 @@ body: | name: copyprop4 body: | bb.0: - liveins: %w0, %w1 - %w20 = COPY %w0 - %w0 = COPY %w20 - BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 - RET_ReallyLR implicit %w0 + liveins: $w0, $w1 + $w20 = COPY $w0 + $w0 = COPY $w20 + BL @foo, csr_aarch64_aapcs, implicit $w0, implicit-def $w0 + RET_ReallyLR implicit $w0 +... + +# Don't try to erase any COPY which overlaps itself. +# CHECK-LABEL: name: copyprop5 +# CHECK: bb.0: +# CHECK: COPY killed $q26_q27_q28_q29 +# CHECK: COPY killed $q28_q29_q30_q31 +name: copyprop5 +body: | + bb.0: + liveins: $q26_q27_q28_q29 + $q28_q29_q30_q31 = COPY killed $q26_q27_q28_q29 + $q26_q27_q28_q29 = COPY killed $q28_q29_q30_q31 + BL @foo, csr_aarch64_aapcs, implicit killed $q26_q27_q28_q29 + RET_ReallyLR +... + +# Don't try to analyze any COPY which overlaps itself. +# CHECK-LABEL: name: copyprop6 +# CHECK: bb.0: +# CHECK: COPY killed $q26_q27_q28_q29 +# CHECK: $q30 = COPY $q28 +name: copyprop6 +body: | + bb.0: + liveins: $q26_q27_q28_q29 + $q28_q29_q30_q31 = COPY killed $q26_q27_q28_q29 + $q30 = COPY $q28 + BL @foo, csr_aarch64_aapcs, implicit killed $q28_q29_q30_q31 + RET_ReallyLR ... diff --git a/test/CodeGen/AArch64/machine-outliner-bad-adrp.mir b/test/CodeGen/AArch64/machine-outliner-bad-adrp.mir new file mode 100644 index 000000000000..af89ac5885eb --- /dev/null +++ b/test/CodeGen/AArch64/machine-outliner-bad-adrp.mir @@ -0,0 +1,42 @@ + +# RUN: llc -mtriple=aarch64--- -verify-machineinstrs -simplify-mir -run-pass=machine-outliner %s -o - | FileCheck %s +--- | + + define void @foo() #0 { + ret void + } + + attributes #0 = { noredzone } +... +--- +name: foo +tracksRegLiveness: true +constants: + - id: 0 + value: 'float 1.990000e+02' + alignment: 4 + isTargetSpecific: false +body: | + bb.0: + liveins: $w1, $w10, $x14, $x15, $x16, $x10, $lr + ; CHECK-NOT: BL + + $w10 = MOVZWi 4, 0, implicit-def $x10 + + renamable $x14 = ADRP target-flags(aarch64-page) %const.0 + renamable $x15 = ADRP target-flags(aarch64-page) %const.0 + renamable $x16 = ADRP target-flags(aarch64-page) %const.0 + + $w10 = MOVZWi 5, 0, implicit-def $x10 + + renamable $x14 = ADRP target-flags(aarch64-page) %const.0 + renamable $x15 = ADRP target-flags(aarch64-page) %const.0 + renamable $x16 = ADRP target-flags(aarch64-page) %const.0 + + $w10 = MOVZWi 6, 0, implicit-def $x10 + + renamable $x14 = ADRP target-flags(aarch64-page) %const.0 + renamable $x15 = ADRP target-flags(aarch64-page) %const.0 + renamable $x16 = ADRP target-flags(aarch64-page) %const.0 + + RET undef $lr diff --git a/test/CodeGen/AArch64/machine-outliner-bad-register.mir b/test/CodeGen/AArch64/machine-outliner-bad-register.mir new file mode 100644 index 000000000000..500f73a6bd58 --- /dev/null +++ b/test/CodeGen/AArch64/machine-outliner-bad-register.mir @@ -0,0 +1,183 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +# Ensure that we don't outline from regions where x16, x17, or nzcv are live +# across the outlining candidate. These values are allowed to be clobbered by, +# say, the linker, in the presence of function calls. Thus, we can't outline +# these, since the insertion of the outlined call could change the values of +# these registers. +--- | + ; No problematic register appears at all. Safe for outlining. + define void @reg_never_defined() #0 { ret void } + + ; A problematic register is live, but after the candidate. Safe for outlining. + define void @reg_defined_after_candidate() #0 { ret void } + + ; A problematic register is live before the candidate, but killed before + ; entry to the candidate. Safe for outlining. + define void @reg_killed_before_candidate() #0 { ret void } + + ; Ensure that we never outline when any of the problematic registers we care + ; about are defined across the outlining candidate. + define void @x16_live() #0 { ret void } + define void @x17_live() #0 { ret void } + define void @nzcv_live() #0 { ret void } + + ; Test a combination of the above behaviours. + ; [candidate] (1) + ; - define a bad register - + ; [candidate] (2) + ; - kill the bad register - + ; [candidate] (3) + ; + ; (1) and (3) should be outlined, while (2) should not be outlined. + define void @multiple_ranges() #0 { ret void } + + attributes #0 = { noredzone } +... +--- + +# There should be two calls to outlined functions here, since we haven't tripped +# any of the cases above. +name: reg_never_defined +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: BL + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.1: + ; CHECK-LABEL: bb.1: + ; CHECK: BL + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.2: + RET undef $lr +... +--- + +name: reg_defined_after_candidate +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: BL + ; CHECK-NEXT: $x16 = ORRXri $x8, 5, implicit-def $x16, implicit-def $w16 + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + $x16 = ORRXri $x8, 5, implicit-def $x16, implicit-def $w16 + $w8 = ORRWri $w16, 5 + RET undef $lr +... +--- + +name: reg_killed_before_candidate +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: BL + liveins: $w8, $wzr, $x16 + dead $x16 = ORRXri $x8, 6 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + RET undef $lr +... +--- + +name: x16_live +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $w8, $wzr, $x16 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.1: + liveins: $x16 + RET undef $lr +... +--- + +name: x17_live +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $w8, $wzr, $x17 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + $w8 = ORRWri $w17, 5 + RET undef $lr +... +--- + +name: nzcv_live +tracksRegLiveness: true +body: | + bb.0: + liveins: $w8, $wzr, $nzcv + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.1: + liveins: $nzcv + RET undef $lr +... +--- + +name: multiple_ranges +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: BL + liveins: $w8, $wzr + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + $x16 = ORRXri $x8, 5, implicit-def $x16 + bb.1: + ; CHECK-LABEL: bb.1: + ; CHECK-NOT: BL + liveins: $w8, $x16 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.2: + ; CHECK-LABEL: bb.2: + ; CHECK: BL + liveins: $w8, $x16 + dead $x16 = ORRXri $x8, 0 + $w8 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 2 + $w8 = ORRWri $wzr, 3 + $w8 = ORRWri $wzr, 4 + bb.3: + liveins: $w8 + RET undef $lr +... +--- diff --git a/test/CodeGen/AArch64/machine-outliner-calls.mir b/test/CodeGen/AArch64/machine-outliner-calls.mir new file mode 100644 index 000000000000..00025ab90625 --- /dev/null +++ b/test/CodeGen/AArch64/machine-outliner-calls.mir @@ -0,0 +1,66 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s +--- | + define void @baz() #0 { + ret void + } + + define void @bar(i32 %a) #0 { + ret void + } + + attributes #0 = { noredzone } +... +--- + +name: bar +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $lr, $w8 + $sp = frame-setup SUBXri $sp, 32, 0 + $fp = frame-setup ADDXri $sp, 16, 0 + + bb.1: + BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + $w0 = ORRWri $wzr, 4 + + BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + $w0 = ORRWri $wzr, 3 + + BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + $w0 = ORRWri $wzr, 2 + + BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + $w0 = ORRWri $wzr, 1 + + + bb.2: + $fp, $lr = LDPXi $sp, 2 + RET undef $lr +... +--- +name: baz +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $lr, $w8 + RET undef $lr + +# CHECK: name: OUTLINED_FUNCTION_0 +# CHECK-DAG: bb.0: +# CHECK-DAG: frame-setup CFI_INSTRUCTION def_cfa_offset -16 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, 16 +# CHECK-NEXT: early-clobber $sp = STRXpre $lr, $sp, -16 +# CHECK-NEXT: BL @baz, implicit-def dead $lr, implicit $sp +# CHECK-NEXT: $w17 = ORRWri $wzr, 1 +# CHECK-NEXT: $w17 = ORRWri $wzr, 1 +# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16 +# CHECK-NEXT: RET undef $lr diff --git a/test/CodeGen/AArch64/machine-outliner-flags.ll b/test/CodeGen/AArch64/machine-outliner-flags.ll new file mode 100644 index 000000000000..e00a19099cf2 --- /dev/null +++ b/test/CodeGen/AArch64/machine-outliner-flags.ll @@ -0,0 +1,47 @@ +; REQUIRES: asserts +; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \ +; RUN: --debug-only=machine-outliner -enable-machine-outliner=always \ +; RUN: -mtriple arm64---- -o /dev/null 2>&1 \ +; RUN: | FileCheck %s -check-prefix=ALWAYS + +; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \ +; RUN: --debug-only=machine-outliner -enable-machine-outliner \ +; RUN: -mtriple arm64---- -o /dev/null 2>&1 \ +; RUN: | FileCheck %s -check-prefix=ENABLE + +; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \ +; RUN: -enable-machine-outliner=never -mtriple arm64---- -o /dev/null 2>&1 \ +; RUN: | FileCheck %s -check-prefix=NEVER + +; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \ +; RUN: -mtriple arm64---- -o /dev/null 2>&1 \ +; RUN: | FileCheck %s -check-prefix=NOT-ADDED + +; RUN: llc %s -O=0 -debug-pass=Structure -verify-machineinstrs \ +; RUN: -mtriple arm64---- -o /dev/null 2>&1 \ +; RUN: | FileCheck %s -check-prefix=OPTNONE + +; Make sure that the outliner is added to the pass pipeline only when the +; appropriate flags/settings are set. Make sure it isn't added otherwise. +; +; Cases where it should be added: +; * -enable-machine-outliner +; * -enable-machine-outliner=always +; +; Cases where it should not be added: +; * -O0 or equivalent +; * -enable-machine-outliner is not passed +; * -enable-machine-outliner=never is passed + +; ALWAYS: Machine Outliner +; ALWAYS: Machine Outliner: Running on all functions +; ENABLE: Machine Outliner +; ENABLE: Machine Outliner: Running on all functions +; NEVER-NOT: Machine Outliner +; NOT-ADDED-NOT: Machine Outliner +; OPTNONE-NOT: Machine Outliner + +define void @foo() { + ret void; +} + diff --git a/test/CodeGen/AArch64/machine-outliner-inline-asm-adrp.mir b/test/CodeGen/AArch64/machine-outliner-inline-asm-adrp.mir new file mode 100644 index 000000000000..4992c1a247a3 --- /dev/null +++ b/test/CodeGen/AArch64/machine-outliner-inline-asm-adrp.mir @@ -0,0 +1,57 @@ +# RUN: llc -simplify-mir -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s +# CHECK-NOT: OUTLINED_FUNCTION +--- | + target triple = "arm64----" + + @g = external global i64, align 8 + + define void @foo() #0 { + ret void + } + + define void @foo2() #0 { + ret void + } + + define void @foo3() #0 { + ret void + } + + attributes #0 = { nounwind noredzone } + +... +--- +name: foo +alignment: 2 +tracksRegLiveness: true +body: | + bb.0 (%ir-block.0): + liveins: $x27, $lr + $x27 = ADRP target-flags(aarch64-page, aarch64-got) @g + $lr = ADRP target-flags(aarch64-page, aarch64-got) @g + RET undef $lr + +... +--- +name: foo2 +alignment: 2 +tracksRegLiveness: true +body: | + bb.0 (%ir-block.0): + liveins: $x27, $lr + $x27 = ADRP target-flags(aarch64-page, aarch64-got) @g + $lr = ADRP target-flags(aarch64-page, aarch64-got) @g + RET undef $lr + +... +--- +name: foo3 +alignment: 2 +tracksRegLiveness: true +body: | + bb.0 (%ir-block.0): + liveins: $x27, $lr + $x27 = ADRP target-flags(aarch64-page, aarch64-got) @g + $lr = ADRP target-flags(aarch64-page, aarch64-got) @g + RET undef $lr +... diff --git a/test/CodeGen/AArch64/machine-outliner-noredzone.ll b/test/CodeGen/AArch64/machine-outliner-noredzone.ll new file mode 100644 index 000000000000..36d860a6b85e --- /dev/null +++ b/test/CodeGen/AArch64/machine-outliner-noredzone.ll @@ -0,0 +1,77 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -enable-machine-outliner -aarch64-redzone %s -o - | FileCheck %s -check-prefix=REDZONE + +; Ensure that the MachineOutliner does not fire on functions which use a +; redzone. We don't care about what's actually outlined here. We just want to +; force behaviour in the outliner to make sure that it never acts on anything +; that might have a redzone. +target triple = "arm64----" + +@x = common global i32 0, align 4 +declare void @baz() #0 + +; In AArch64FrameLowering, there are a couple special early exit cases where we +; *know* we don't use a redzone. The GHC calling convention is one of these +; cases. Make sure that we know we don't have a redzone even in these cases. +define cc 10 void @bar() #0 { + ; CHECK-LABEL: bar + ; CHECK: bl OUTLINED_FUNCTION + ; REDZONE-LABEL: bar + ; REDZONE: bl OUTLINED_FUNCTION + %1 = load i32, i32* @x, align 4 + %2 = add nsw i32 %1, 1 + store i32 %2, i32* @x, align 4 + call void @baz() + %3 = load i32, i32* @x, align 4 + %4 = add nsw i32 %3, 1 + store i32 %4, i32* @x, align 4 + call void @baz() + %5 = load i32, i32* @x, align 4 + %6 = add nsw i32 %5, 1 + store i32 %6, i32* @x, align 4 + ret void +} + +; foo() should have a redzone when compiled with -aarch64-redzone, and no +; redzone otherwise. +define void @foo() #0 { + ; CHECK-LABEL: foo + ; CHECK: bl OUTLINED_FUNCTION + ; REDZONE-LABEL: foo + ; REDZONE-NOT: bl OUTLINED_FUNCTION + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 0, i32* %2, align 4 + store i32 0, i32* %3, align 4 + store i32 0, i32* %4, align 4 + %5 = load i32, i32* %1, align 4 + %6 = add nsw i32 %5, 1 + store i32 %6, i32* %1, align 4 + %7 = load i32, i32* %3, align 4 + %8 = add nsw i32 %7, 1 + store i32 %8, i32* %3, align 4 + %9 = load i32, i32* %4, align 4 + %10 = add nsw i32 %9, 1 + store i32 %10, i32* %4, align 4 + %11 = load i32, i32* %2, align 4 + %12 = add nsw i32 %11, 1 + store i32 %12, i32* %2, align 4 + %13 = load i32, i32* %1, align 4 + %14 = add nsw i32 %13, 1 + store i32 %14, i32* %1, align 4 + %15 = load i32, i32* %3, align 4 + %16 = add nsw i32 %15, 1 + store i32 %16, i32* %3, align 4 + %17 = load i32, i32* %4, align 4 + %18 = add nsw i32 %17, 1 + store i32 %18, i32* %4, align 4 + %19 = load i32, i32* %2, align 4 + %20 = add nsw i32 %19, -1 + store i32 %20, i32* %2, align 4 + ret void +} + +attributes #0 = { noinline nounwind optnone } diff --git a/test/CodeGen/AArch64/machine-outliner-remarks.ll b/test/CodeGen/AArch64/machine-outliner-remarks.ll index a5f131b5a0ca..e721b8a648a4 100644 --- a/test/CodeGen/AArch64/machine-outliner-remarks.ll +++ b/test/CodeGen/AArch64/machine-outliner-remarks.ll @@ -1,18 +1,17 @@ ; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -pass-remarks=machine-outliner -pass-remarks-missed=machine-outliner -o /dev/null 2>&1 | FileCheck %s -; CHECK: machine-outliner-remarks.ll:5:9: +; CHECK: <unknown>:0:0: ; CHECK-SAME: Did not outline 2 instructions from 2 locations. -; CHECK-SAME: Instructions from outlining all occurrences (9) >= -; CHECK-SAME: Unoutlined instruction count (4) -; CHECK-SAME: (Also found at: machine-outliner-remarks.ll:13:9) -; CHECK: remark: <unknown>:0:0: Saved 5 instructions by outlining 7 instructions -; CHECK-SAME: from 2 locations. (Found at: machine-outliner-remarks.ll:27:9, -; CHECK-SAME: machine-outliner-remarks.ll:36:1) +; CHECK-SAME: Bytes from outlining all occurrences (36) >= +; CHECK-SAME: Unoutlined instruction bytes (16) +; CHECK-SAME: (Also found at: <UNKNOWN LOCATION>) +; CHECK: remark: <unknown>:0:0: Saved 20 bytes by outlining 12 instructions +; CHECK-SAME: from 2 locations. (Found at: <UNKNOWN LOCATION>, +; CHECK-SAME: <UNKNOWN LOCATION>) ; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -o /dev/null -pass-remarks-missed=machine-outliner -pass-remarks-output=%t.yaml ; RUN: cat %t.yaml | FileCheck %s -check-prefix=YAML ; YAML: --- !Missed ; YAML-NEXT: Pass: machine-outliner ; YAML-NEXT: Name: NotOutliningCheaper -; YAML-NEXT: DebugLoc: { File: machine-outliner-remarks.ll, Line: 5, Column: 9 } ; YAML-NEXT: Function: dog ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Did not outline ' @@ -21,15 +20,14 @@ ; YAML-NEXT: - String: ' from ' ; YAML-NEXT: - NumOccurrences: '2' ; YAML-NEXT: - String: ' locations.' -; YAML-NEXT: - String: ' Instructions from outlining all occurrences (' -; YAML-NEXT: - OutliningCost: '9' +; YAML-NEXT: - String: ' Bytes from outlining all occurrences (' +; YAML-NEXT: - OutliningCost: '36' ; YAML-NEXT: - String: ')' -; YAML-NEXT: - String: ' >= Unoutlined instruction count (' -; YAML-NEXT: - NotOutliningCost: '4' +; YAML-NEXT: - String: ' >= Unoutlined instruction bytes (' +; YAML-NEXT: - NotOutliningCost: '16' ; YAML-NEXT: - String: ')' ; YAML-NEXT: - String: ' (Also found at: ' -; YAML-NEXT: - OtherStartLoc1: 'machine-outliner-remarks.ll:13:9' -; YAML-NEXT: DebugLoc: { File: machine-outliner-remarks.ll, Line: 13, Column: 9 } +; YAML-NEXT: - OtherStartLoc1: '<UNKNOWN LOCATION>' ; YAML-NEXT: - String: ')' ; YAML: --- !Passed ; YAML-NEXT: Pass: machine-outliner @@ -37,20 +35,18 @@ ; YAML-NEXT: Function: OUTLINED_FUNCTION_0 ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Saved ' -; YAML-NEXT: - OutliningBenefit: '5' -; YAML-NEXT: - String: ' instructions by ' +; YAML-NEXT: - OutliningBenefit: '20' +; YAML-NEXT: - String: ' bytes by ' ; YAML-NEXT: - String: 'outlining ' -; YAML-NEXT: - Length: '7' +; YAML-NEXT: - Length: '12' ; YAML-NEXT: - String: ' instructions ' ; YAML-NEXT: - String: 'from ' ; YAML-NEXT: - NumOccurrences: '2' ; YAML-NEXT: - String: ' locations. ' ; YAML-NEXT: - String: '(Found at: ' -; YAML-NEXT: - StartLoc0: 'machine-outliner-remarks.ll:27:9' -; YAML-NEXT: DebugLoc: { File: machine-outliner-remarks.ll, Line: 27, Column: 9 } +; YAML-NEXT: - StartLoc0: '<UNKNOWN LOCATION>' ; YAML-NEXT: - String: ', ' -; YAML-NEXT: - StartLoc1: 'machine-outliner-remarks.ll:36:1' -; YAML-NEXT: DebugLoc: { File: machine-outliner-remarks.ll, Line: 36, Column: 1 } +; YAML-NEXT: - StartLoc1: '<UNKNOWN LOCATION>' ; YAML-NEXT: - String: ')' define void @dog() #0 !dbg !8 { @@ -76,10 +72,14 @@ define void @foo() #0 !dbg !18 { %2 = alloca i32, align 4 %3 = alloca i32, align 4 %4 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - store i32 1, i32* %2, align 4, !dbg !24 - store i32 2, i32* %3, align 4 - store i32 3, i32* %4, align 4, !dbg !26 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4, !dbg !24 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4, !dbg !26 ret void } @@ -88,10 +88,14 @@ define void @bar() #0 !dbg !27 { %2 = alloca i32, align 4 %3 = alloca i32, align 4 %4 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - store i32 1, i32* %2, align 4, !dbg !33 - store i32 2, i32* %3, align 4 - store i32 3, i32* %4, align 4, !dbg !35 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4, !dbg !33 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4, !dbg !35 ret void } @@ -109,15 +113,15 @@ attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="false" !5 = !{i32 1, !"wchar_size", i32 4} !6 = !{i32 7, !"PIC Level", i32 2} !7 = !{!""} -!8 = distinct !DISubprogram(name: "dog", scope: !1, file: !1, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!8 = distinct !DISubprogram(name: "dog", scope: !1, file: !1, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !9 = !DISubroutineType(types: !10) !10 = !{null} !12 = !DILocation(line: 5, column: 9, scope: !8) -!14 = distinct !DISubprogram(name: "cat", scope: !1, file: !1, line: 10, type: !9, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!14 = distinct !DISubprogram(name: "cat", scope: !1, file: !1, line: 10, type: !9, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !16 = !DILocation(line: 13, column: 9, scope: !14) -!18 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 26, type: !9, isLocal: false, isDefinition: true, scopeLine: 26, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!18 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 26, type: !9, isLocal: false, isDefinition: true, scopeLine: 26, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !24 = !DILocation(line: 27, column: 9, scope: !18) !26 = !DILocation(line: 29, column: 9, scope: !18) -!27 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 35, type: !9, isLocal: false, isDefinition: true, scopeLine: 35, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!27 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 35, type: !9, isLocal: false, isDefinition: true, scopeLine: 35, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !33 = !DILocation(line: 36, column: 1, scope: !27) !35 = !DILocation(line: 38, column: 1, scope: !27) diff --git a/test/CodeGen/AArch64/machine-outliner-tail.ll b/test/CodeGen/AArch64/machine-outliner-tail.ll new file mode 100644 index 000000000000..751128c7f350 --- /dev/null +++ b/test/CodeGen/AArch64/machine-outliner-tail.ll @@ -0,0 +1,22 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +; CHECK: OUTLINED_FUNCTION_0: +; CHECK: orr w0, wzr, #0x1 +; CHECK-NEXT: orr w1, wzr, #0x2 +; CHECK-NEXT: orr w2, wzr, #0x3 +; CHECK-NEXT: orr w3, wzr, #0x4 +; CHECK-NEXT: b z + +define void @a() { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} + +declare void @z(i32, i32, i32, i32) + +define dso_local void @b(i32* nocapture readnone %p) { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} diff --git a/test/CodeGen/AArch64/machine-outliner-thunk.ll b/test/CodeGen/AArch64/machine-outliner-thunk.ll new file mode 100644 index 000000000000..819c940f78b0 --- /dev/null +++ b/test/CodeGen/AArch64/machine-outliner-thunk.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -enable-machine-outliner -verify-machineinstrs | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-pc-linux-gnu" + +declare i32 @thunk_called_fn(i32, i32, i32, i32) + +define i32 @a() { +; CHECK-LABEL: a: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: add w0, w0, #8 // =8 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 8 + ret i32 %cx +} + +define i32 @b() { +; CHECK-LABEL: b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: add w0, w0, #88 // =88 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 88 + ret i32 %cx +} + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0x1 +; CHECK-NEXT: orr w1, wzr, #0x2 +; CHECK-NEXT: orr w2, wzr, #0x3 +; CHECK-NEXT: orr w3, wzr, #0x4 +; CHECK-NEXT: b thunk_called_fn diff --git a/test/CodeGen/AArch64/machine-outliner.ll b/test/CodeGen/AArch64/machine-outliner.ll index 9b6254fb3cc1..1b45409b799a 100644 --- a/test/CodeGen/AArch64/machine-outliner.ll +++ b/test/CodeGen/AArch64/machine-outliner.ll @@ -1,64 +1,98 @@ -; RUN: llc -enable-machine-outliner -mtriple=aarch64-apple-darwin < %s | FileCheck %s -check-prefix=NoODR -; RUN: llc -enable-machine-outliner -enable-linkonceodr-outlining -mtriple=aarch64-apple-darwin < %s | FileCheck %s -check-prefix=ODR +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-apple-darwin < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-apple-darwin -mcpu=cortex-a53 -enable-misched=false < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -enable-machine-outliner -enable-linkonceodr-outlining -mtriple=aarch64-apple-darwin < %s | FileCheck %s -check-prefix=ODR define linkonce_odr void @fish() #0 { ; CHECK-LABEL: _fish: - ; NoODR: orr w8, wzr, #0x1 - ; NoODR-NEXT: stp w8, wzr, [sp, #8] - ; NoODR-NEXT: orr w8, wzr, #0x2 - ; NoODR-NEXT: str w8, [sp, #4] - ; NoODR-NEXT: orr w8, wzr, #0x3 - ; NoODR-NEXT: str w8, [sp], #16 - ; NoODR-NEXT: ret - ; ODR: b l_OUTLINED_FUNCTION_0 + ; CHECK-NOT: OUTLINED + ; ODR: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 %4 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - store i32 1, i32* %2, align 4 - store i32 2, i32* %3, align 4 - store i32 3, i32* %4, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @turtle() section "TURTLE,turtle" { + ; CHECK-LABEL: _turtle: + ; ODR-LABEL: _turtle: + ; CHECK-NOT: OUTLINED + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 ret void } define void @cat() #0 { ; CHECK-LABEL: _cat: - ; CHECK: b l_OUTLINED_FUNCTION_0 - ; CHECK-NOT: ret + ; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] + ; ODR: [[OUTLINED]] %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 %4 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - store i32 1, i32* %2, align 4 - store i32 2, i32* %3, align 4 - store i32 3, i32* %4, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 ret void } define void @dog() #0 { ; CHECK-LABEL: _dog: - ; CHECK: b l_OUTLINED_FUNCTION_0 - ; CHECK-NOT: ret + ; CHECK: [[OUTLINED]] + ; ODR: [[OUTLINED]] %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 %4 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - store i32 1, i32* %2, align 4 - store i32 2, i32* %3, align 4 - store i32 3, i32* %4, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 ret void } -; CHECK-LABEL: l_OUTLINED_FUNCTION_0: -; CHECK: orr w8, wzr, #0x1 -; CHECK-NEXT: stp w8, wzr, [sp, #8] -; CHECK-NEXT: orr w8, wzr, #0x2 -; CHECK-NEXT: str w8, [sp, #4] -; CHECK-NEXT: orr w8, wzr, #0x3 -; CHECK-NEXT: str w8, [sp], #16 +; ODR: [[OUTLINED]]: +; CHECK: .p2align 2 +; CHECK-NEXT: [[OUTLINED]]: +; CHECK: orr w8, wzr, #0x1 +; CHECK-NEXT: str w8, [sp, #44] +; CHECK-NEXT: orr w8, wzr, #0x2 +; CHECK-NEXT: str w8, [sp, #40] +; CHECK-NEXT: orr w8, wzr, #0x3 +; CHECK-NEXT: str w8, [sp, #36] +; CHECK-NEXT: orr w8, wzr, #0x4 +; CHECK-NEXT: str w8, [sp, #32] +; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: str w8, [sp, #28] +; CHECK-NEXT: orr w8, wzr, #0x6 +; CHECK-NEXT: str w8, [sp, #24] ; CHECK-NEXT: ret -attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="false" "target-cpu"="cyclone" } +attributes #0 = { noredzone "target-cpu"="cyclone" } diff --git a/test/CodeGen/AArch64/machine-outliner.mir b/test/CodeGen/AArch64/machine-outliner.mir index 708e2e428802..024bee47075b 100644 --- a/test/CodeGen/AArch64/machine-outliner.mir +++ b/test/CodeGen/AArch64/machine-outliner.mir @@ -1,6 +1,8 @@ -# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s --- | + @x = common global i32 0, align 4 + define void @baz() #0 { ret void } @@ -20,103 +22,139 @@ # - Create outlined functions # - Don't outline anything to do with LR or W30 # - Save LR when it's not available +# - Don't outline stack instructions when we might need to save + restore +# - Functions whose addresses are taken can still be outlined # # CHECK-LABEL: name: main + # CHECK: BL @OUTLINED_FUNCTION_[[F0:[0-9]+]] -# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 -# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 -# CHECK-NEXT: %lr = ORRXri %xzr, 1 +# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16 +# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0 +# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1 +# CHECK-NEXT: $lr = ORRXri $xzr, 1 + # CHECK: BL @OUTLINED_FUNCTION_[[F0]] -# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 -# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 -# CHECK-NEXT: %lr = ORRXri %xzr, 1 +# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16 +# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0 +# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1 +# CHECK-NEXT: $lr = ORRXri $xzr, 1 + # CHECK: BL @OUTLINED_FUNCTION_[[F0]] -# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 -# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 -# CHECK-NEXT: %lr = ORRXri %xzr, 1 +# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16 +# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0 +# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1 +# CHECK-NEXT: $lr = ORRXri $xzr, 1 name: main +tracksRegLiveness: true body: | bb.0: - %sp = frame-setup SUBXri %sp, 16, 0 - %x9 = ORRXri %xzr, 1 - %w16 = ORRWri %wzr, 1 - %w30 = ORRWri %wzr, 1 - %lr = ORRXri %xzr, 1 - - %x20, %x19 = LDPXi %sp, 10 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - STRHHroW %w16, %x9, %w30, 1, 1 - %lr = ORRXri %xzr, 1 + $sp = frame-setup SUBXri $sp, 16, 0 + renamable $x9 = ADRP target-flags(aarch64-page) @bar + $x9 = ORRXri $xzr, 1 + $w16 = ORRWri $wzr, 1 + $w30 = ORRWri $wzr, 1 + $lr = ORRXri $xzr, 1 - %w3 = ORRWri %wzr, 1993 + $x20, $x19 = LDPXi $sp, 10 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + renamable $x9 = ADRP target-flags(aarch64-page) @x + $x16 = ADDXri $sp, 48, 0; + STRHHroW $w16, $x9, $w30, 1, 1 + $lr = ORRXri $xzr, 1 + $w3 = ORRWri $wzr, 1993 - %x20, %x19 = LDPXi %sp, 10 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - STRHHroW %w16, %x9, %w30, 1, 1 - %lr = ORRXri %xzr, 1 + $x20, $x19 = LDPXi $sp, 10 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + renamable $x9 = ADRP target-flags(aarch64-page) @x + $x16 = ADDXri $sp, 48, 0; + STRHHroW $w16, $x9, $w30, 1, 1 + $lr = ORRXri $xzr, 1 - %w4 = ORRWri %wzr, 1994 + $w4 = ORRWri $wzr, 1994 - %x20, %x19 = LDPXi %sp, 10 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - %w16 = ORRWri %wzr, 1 - STRHHroW %w16, %x9, %w30, 1, 1 - %lr = ORRXri %xzr, 1 + $x20, $x19 = LDPXi $sp, 10 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + $w16 = ORRWri $wzr, 1 + renamable $x9 = ADRP target-flags(aarch64-page) @x + $x16 = ADDXri $sp, 48, 0; + STRHHroW $w16, $x9, $w30, 1, 1 + $lr = ORRXri $xzr, 1 - %sp = ADDXri %sp, 16, 0 - RET undef %lr + $sp = ADDXri $sp, 16, 0 + RET undef $lr ... --- # This test ensures that we can avoid saving LR when it's available. +# It also makes sure that KILL instructions don't impact outlining. # CHECK-LABEL: bb.1: -# CHECK-NOT: BL @baz, implicit-def dead %lr, implicit %sp -# CHECK: BL @OUTLINED_FUNCTION_[[F1:[0-9]+]], implicit-def %lr, implicit %sp -# CHECK-NEXT: %w17 = ORRWri %wzr, 2 -# CHECK-NEXT: BL @OUTLINED_FUNCTION_[[F1]], implicit-def %lr, implicit %sp -# CHECK-NEXT: %w8 = ORRWri %wzr, 0 +# CHECK-NOT: BL @baz, implicit-def dead $lr, implicit $sp +# CHECK: BL @OUTLINED_FUNCTION_[[F1:[0-9]+]], implicit-def $lr, implicit $sp +# CHECK-NEXT: $w17 = ORRWri $wzr, 2 +# CHECK-NEXT: BL @OUTLINED_FUNCTION_[[F1]], implicit-def $lr, implicit $sp +# CHECK-NEXT: $w8 = ORRWri $wzr, 0 +# CHECK-NOT: $w17 = KILL renamable $w17, implicit killed $w17 name: bar tracksRegLiveness: true body: | bb.0: - liveins: %w0, %lr, %w8 - %sp = frame-setup SUBXri %sp, 32, 0 - %fp = frame-setup ADDXri %sp, 16, 0 + liveins: $w0, $lr, $w8 + $sp = frame-setup SUBXri $sp, 32, 0 + $fp = frame-setup ADDXri $sp, 16, 0 bb.1: - BL @baz, implicit-def dead %lr, implicit %sp - %w17 = ORRWri %wzr, 1 - %w17 = ORRWri %wzr, 1 - %w17 = ORRWri %wzr, 1 - %w17 = ORRWri %wzr, 1 - BL @baz, implicit-def dead %lr, implicit %sp - %w17 = ORRWri %wzr, 2 - BL @baz, implicit-def dead %lr, implicit %sp - %w17 = ORRWri %wzr, 1 - %w17 = ORRWri %wzr, 1 - %w17 = ORRWri %wzr, 1 - %w17 = ORRWri %wzr, 1 - BL @baz, implicit-def dead %lr, implicit %sp - %w8 = ORRWri %wzr, 0 - + BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + $w17 = KILL renamable $w17, implicit killed $w17 + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 2 + BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + BL @baz, implicit-def dead $lr, implicit $sp + $w17 = ORRWri $wzr, 1 + $w17 = ORRWri $wzr, 1 + $w8 = ORRWri $wzr, 0 + bb.2: - %fp, %lr = LDPXi %sp, 2 - %sp = ADDXri %sp, 32, 0 - RET undef %lr + $w15 = ORRWri $wzr, 1 + $w15 = ORRWri $wzr, 1 + $w15 = ORRWri $wzr, 1 + $w15 = ORRWri $wzr, 1 + $x15 = ADDXri $sp, 48, 0; + $w9 = ORRWri $wzr, 0 + $w15 = ORRWri $wzr, 1 + $w15 = ORRWri $wzr, 1 + $w15 = ORRWri $wzr, 1 + $w15 = ORRWri $wzr, 1 + $x15 = ADDXri $sp, 48, 0; + $w8 = ORRWri $wzr, 0 + + bb.3: + $fp, $lr = LDPXi $sp, 2 + $sp = ADDXri $sp, 32, 0 + RET undef $lr ... --- @@ -124,8 +162,8 @@ name: baz tracksRegLiveness: true body: | bb.0: - liveins: %w0, %lr, %w8 - RET undef %lr + liveins: $w0, $lr, $w8 + RET undef $lr # CHECK-LABEL: name: OUTLINED_FUNCTION_{{[0-9]}} # CHECK=LABEL: name: OUTLINED_FUNCTION_{{[1-9]}} diff --git a/test/CodeGen/AArch64/machine-scheduler.mir b/test/CodeGen/AArch64/machine-scheduler.mir index 933afdb6da9b..33cb72f8be7d 100644 --- a/test/CodeGen/AArch64/machine-scheduler.mir +++ b/test/CodeGen/AArch64/machine-scheduler.mir @@ -18,18 +18,18 @@ --- # CHECK-LABEL: name: load_imp-def # CHECK: bb.0.entry: -# CHECK: LDRWui %x0, 0 -# CHECK: LDRWui %x0, 1 -# CHECK: STRWui %w1, %x0, 2 +# CHECK: LDRWui $x0, 0 +# CHECK: LDRWui $x0, 1 +# CHECK: STRWui $w1, $x0, 2 name: load_imp-def tracksRegLiveness: true body: | bb.0.entry: - liveins: %w1, %x0 - %w8 = LDRWui %x0, 1, implicit-def %x8 :: (load 4 from %ir.0) - STRWui killed %w1, %x0, 2 :: (store 4 into %ir.arrayidx1) - %w9 = LDRWui killed %x0, 0, implicit-def %x9 :: (load 4 from %ir.arrayidx19, align 8) - %x0 = ADDXrr killed %x9, killed %x8 - RET_ReallyLR implicit %x0 + liveins: $w1, $x0 + $w8 = LDRWui $x0, 1, implicit-def $x8 :: (load 4 from %ir.0) + STRWui killed $w1, $x0, 2 :: (store 4 into %ir.arrayidx1) + $w9 = LDRWui killed $x0, 0, implicit-def $x9 :: (load 4 from %ir.arrayidx19, align 8) + $x0 = ADDXrr killed $x9, killed $x8 + RET_ReallyLR implicit $x0 ... diff --git a/test/CodeGen/AArch64/machine-sink-zr.mir b/test/CodeGen/AArch64/machine-sink-zr.mir index 2cf2bc488237..583ce40ad0e9 100644 --- a/test/CodeGen/AArch64/machine-sink-zr.mir +++ b/test/CodeGen/AArch64/machine-sink-zr.mir @@ -15,24 +15,24 @@ body: | ; Check that WZR copy is sunk into the loop preheader. ; CHECK-LABEL: name: sinkwzr ; CHECK-LABEL: bb.0: - ; CHECK-NOT: COPY %wzr + ; CHECK-NOT: COPY $wzr bb.0: - liveins: %w0 + liveins: $w0 - %0 = COPY %w0 - %1 = COPY %wzr + %0 = COPY $w0 + %1 = COPY $wzr CBZW %0, %bb.3 ; CHECK-LABEL: bb.1: - ; CHECK: COPY %wzr + ; CHECK: COPY $wzr bb.1: B %bb.2 bb.2: %2 = PHI %0, %bb.1, %4, %bb.2 - %w0 = COPY %1 - %3 = SUBSWri %2, 1, 0, implicit-def dead %nzcv + $w0 = COPY %1 + %3 = SUBSWri %2, 1, 0, implicit-def dead $nzcv %4 = COPY %3 CBZW %3, %bb.3 B %bb.2 diff --git a/test/CodeGen/AArch64/machine-zero-copy-remove.mir b/test/CodeGen/AArch64/machine-zero-copy-remove.mir index 3f25c0715574..854e4f8cad0a 100644 --- a/test/CodeGen/AArch64/machine-zero-copy-remove.mir +++ b/test/CodeGen/AArch64/machine-zero-copy-remove.mir @@ -1,565 +1,565 @@ # RUN: llc -mtriple=aarch64--linux-gnu -run-pass=aarch64-copyelim %s -verify-machineinstrs -o - | FileCheck %s --- # CHECK-LABEL: name: test1 -# CHECK: ANDSWri %w0, 1, implicit-def %nzcv +# CHECK: ANDSWri $w0, 1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test1 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1, %x2 + liveins: $w0, $x1, $x2 - %w0 = ANDSWri %w0, 1, implicit-def %nzcv - STRWui killed %w0, killed %x1, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = ANDSWri $w0, 1, implicit-def $nzcv + STRWui killed $w0, killed $x1, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %w0 = COPY %wzr - STRWui killed %w0, killed %x2, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x2, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test2 -# CHECK: ANDSXri %x0, 1, implicit-def %nzcv +# CHECK: ANDSXri $x0, 1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test2 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 - %x0 = ANDSXri %x0, 1, implicit-def %nzcv - STRXui killed %x0, killed %x1, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = ANDSXri $x0, 1, implicit-def $nzcv + STRXui killed $x0, killed $x1, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %x0 = COPY %xzr - STRXui killed %x0, killed %x2, 0 + $x0 = COPY $xzr + STRXui killed $x0, killed $x2, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test3 -# CHECK: ADDSWri %w0, 1, 0, implicit-def %nzcv +# CHECK: ADDSWri $w0, 1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test3 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1, %x2 + liveins: $w0, $x1, $x2 - %w0 = ADDSWri %w0, 1, 0, implicit-def %nzcv - STRWui killed %w0, killed %x1, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = ADDSWri $w0, 1, 0, implicit-def $nzcv + STRWui killed $w0, killed $x1, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %w0 = COPY %wzr - STRWui killed %w0, killed %x2, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x2, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test4 -# CHECK: ADDSXri %x0, 1, 0, implicit-def %nzcv +# CHECK: ADDSXri $x0, 1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test4 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 - %x0 = ADDSXri %x0, 1, 0, implicit-def %nzcv - STRXui killed %x0, killed %x1, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = ADDSXri $x0, 1, 0, implicit-def $nzcv + STRXui killed $x0, killed $x1, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %x0 = COPY %xzr - STRXui killed %x0, killed %x2, 0 + $x0 = COPY $xzr + STRXui killed $x0, killed $x2, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test5 -# CHECK: SUBSWri %w0, 1, 0, implicit-def %nzcv +# CHECK: SUBSWri $w0, 1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test5 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1, %x2 + liveins: $w0, $x1, $x2 - %w0 = SUBSWri %w0, 1, 0, implicit-def %nzcv - STRWui killed %w0, killed %x1, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = SUBSWri $w0, 1, 0, implicit-def $nzcv + STRWui killed $w0, killed $x1, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %w0 = COPY %wzr - STRWui killed %w0, killed %x2, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x2, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test6 -# CHECK: SUBSXri %x0, 1, 0, implicit-def %nzcv +# CHECK: SUBSXri $x0, 1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test6 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 - %x0 = SUBSXri %x0, 1, 0, implicit-def %nzcv - STRXui killed %x0, killed %x1, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = SUBSXri $x0, 1, 0, implicit-def $nzcv + STRXui killed $x0, killed $x1, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %x0 = COPY %xzr - STRXui killed %x0, killed %x2, 0 + $x0 = COPY $xzr + STRXui killed $x0, killed $x2, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test7 -# CHECK: ADDSWrr %w0, %w1, implicit-def %nzcv +# CHECK: ADDSWrr $w0, $w1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test7 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %w1, %x2, %x3 + liveins: $w0, $w1, $x2, $x3 - %w0 = ADDSWrr %w0, %w1, implicit-def %nzcv - STRWui killed %w0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = ADDSWrr $w0, $w1, implicit-def $nzcv + STRWui killed $w0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %w0 = COPY %wzr - STRWui killed %w0, killed %x3, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test8 -# CHECK: ADDSXrr %x0, %x1, implicit-def %nzcv +# CHECK: ADDSXrr $x0, $x1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test8 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 - %x0 = ADDSXrr %x0, %x1, implicit-def %nzcv - STRXui killed %x0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = ADDSXrr $x0, $x1, implicit-def $nzcv + STRXui killed $x0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %x0 = COPY %xzr - STRXui killed %x0, killed %x3, 0 + $x0 = COPY $xzr + STRXui killed $x0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test9 -# CHECK: ANDSWrr %w0, %w1, implicit-def %nzcv +# CHECK: ANDSWrr $w0, $w1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test9 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %w1, %x2, %x3 + liveins: $w0, $w1, $x2, $x3 - %w0 = ANDSWrr %w0, %w1, implicit-def %nzcv - STRWui killed %w0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = ANDSWrr $w0, $w1, implicit-def $nzcv + STRWui killed $w0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %w0 = COPY %wzr - STRWui killed %w0, killed %x3, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test10 -# CHECK: ANDSXrr %x0, %x1, implicit-def %nzcv +# CHECK: ANDSXrr $x0, $x1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test10 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 - %x0 = ANDSXrr %x0, %x1, implicit-def %nzcv - STRXui killed %x0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = ANDSXrr $x0, $x1, implicit-def $nzcv + STRXui killed $x0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %x0 = COPY %xzr - STRXui killed %x0, killed %x3, 0 + $x0 = COPY $xzr + STRXui killed $x0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test11 -# CHECK: BICSWrr %w0, %w1, implicit-def %nzcv +# CHECK: BICSWrr $w0, $w1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test11 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %w1, %x2, %x3 + liveins: $w0, $w1, $x2, $x3 - %w0 = BICSWrr %w0, %w1, implicit-def %nzcv - STRWui killed %w0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = BICSWrr $w0, $w1, implicit-def $nzcv + STRWui killed $w0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %w0 = COPY %wzr - STRWui killed %w0, killed %x3, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test12 -# CHECK: BICSXrr %x0, %x1, implicit-def %nzcv +# CHECK: BICSXrr $x0, $x1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test12 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 - %x0 = BICSXrr %x0, %x1, implicit-def %nzcv - STRXui killed %x0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = BICSXrr $x0, $x1, implicit-def $nzcv + STRXui killed $x0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %x0 = COPY %xzr - STRXui killed %x0, killed %x3, 0 + $x0 = COPY $xzr + STRXui killed $x0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test13 -# CHECK: SUBSWrr %w0, %w1, implicit-def %nzcv +# CHECK: SUBSWrr $w0, $w1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test13 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %w1, %x2, %x3 + liveins: $w0, $w1, $x2, $x3 - %w0 = SUBSWrr %w0, %w1, implicit-def %nzcv - STRWui killed %w0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = SUBSWrr $w0, $w1, implicit-def $nzcv + STRWui killed $w0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %w0 = COPY %wzr - STRWui killed %w0, killed %x3, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test14 -# CHECK: SUBSXrr %x0, %x1, implicit-def %nzcv +# CHECK: SUBSXrr $x0, $x1, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test14 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 - %x0 = SUBSXrr %x0, %x1, implicit-def %nzcv - STRXui killed %x0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = SUBSXrr $x0, $x1, implicit-def $nzcv + STRXui killed $x0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %x0 = COPY %xzr - STRXui killed %x0, killed %x3, 0 + $x0 = COPY $xzr + STRXui killed $x0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test15 -# CHECK: ADDSWrs %w0, %w1, 0, implicit-def %nzcv +# CHECK: ADDSWrs $w0, $w1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test15 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %w1, %x2, %x3 + liveins: $w0, $w1, $x2, $x3 - %w0 = ADDSWrs %w0, %w1, 0, implicit-def %nzcv - STRWui killed %w0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = ADDSWrs $w0, $w1, 0, implicit-def $nzcv + STRWui killed $w0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %w0 = COPY %wzr - STRWui killed %w0, killed %x3, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test16 -# CHECK: ADDSXrs %x0, %x1, 0, implicit-def %nzcv +# CHECK: ADDSXrs $x0, $x1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test16 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 - %x0 = ADDSXrs %x0, %x1, 0, implicit-def %nzcv - STRXui killed %x0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = ADDSXrs $x0, $x1, 0, implicit-def $nzcv + STRXui killed $x0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %x0 = COPY %xzr - STRXui killed %x0, killed %x3, 0 + $x0 = COPY $xzr + STRXui killed $x0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test17 -# CHECK: ANDSWrs %w0, %w1, 0, implicit-def %nzcv +# CHECK: ANDSWrs $w0, $w1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test17 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %w1, %x2, %x3 + liveins: $w0, $w1, $x2, $x3 - %w0 = ANDSWrs %w0, %w1, 0, implicit-def %nzcv - STRWui killed %w0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = ANDSWrs $w0, $w1, 0, implicit-def $nzcv + STRWui killed $w0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %w0 = COPY %wzr - STRWui killed %w0, killed %x3, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test18 -# CHECK: ANDSXrs %x0, %x1, 0, implicit-def %nzcv +# CHECK: ANDSXrs $x0, $x1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %xzr +# CHECK-NOT: COPY $xzr name: test18 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2, %x3 + liveins: $x0, $x1, $x2, $x3 - %x0 = ANDSXrs %x0, %x1, 0, implicit-def %nzcv - STRXui killed %x0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = ANDSXrs $x0, $x1, 0, implicit-def $nzcv + STRXui killed $x0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %x0 = COPY %xzr - STRXui killed %x0, killed %x3, 0 + $x0 = COPY $xzr + STRXui killed $x0, killed $x3, 0 bb.2: RET_ReallyLR ... # CHECK-LABEL: name: test19 -# CHECK: BICSWrs %w0, %w1, 0, implicit-def %nzcv +# CHECK: BICSWrs $w0, $w1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: COPY %wzr +# CHECK-NOT: COPY $wzr name: test19 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %w1, %x2, %x3 + liveins: $w0, $w1, $x2, $x3 - %w0 = BICSWrs %w0, %w1, 0, implicit-def %nzcv - STRWui killed %w0, killed %x2, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = BICSWrs $w0, $w1, 0, implicit-def $nzcv + STRWui killed $w0, killed $x2, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x3 + liveins: $x3 - %w0 = COPY %wzr - STRWui killed %w0, killed %x3, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x3, 0 bb.2: RET_ReallyLR ... # Unicorn test - we can remove a redundant copy and a redundant mov # CHECK-LABEL: name: test20 -# CHECK: SUBSWri %w1, 1, 0, implicit-def %nzcv +# CHECK: SUBSWri $w1, 1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK-NOT: %w0 = COPY %wzr -# CHECK-NOT: %w1 = MOVi32imm 1 +# CHECK-NOT: $w0 = COPY $wzr +# CHECK-NOT: $w1 = MOVi32imm 1 name: test20 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w1, %x2 + liveins: $w1, $x2 - %w0 = SUBSWri %w1, 1, 0, implicit-def %nzcv - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = SUBSWri $w1, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %w0 = COPY %wzr - %w1 = MOVi32imm 1 - STRWui killed %w0, %x2, 0 - STRWui killed %w1, killed %x2, 1 + $w0 = COPY $wzr + $w1 = MOVi32imm 1 + STRWui killed $w0, $x2, 0 + STRWui killed $w1, killed $x2, 1 bb.2: RET_ReallyLR ... -# Negative test - MOVi32imm clobbers %w0 +# Negative test - MOVi32imm clobbers $w0 # CHECK-LABEL: name: test21 -# CHECK: ANDSWri %w0, 1, implicit-def %nzcv +# CHECK: ANDSWri $w0, 1, implicit-def $nzcv # CHECK: bb.1: -# CHECK: %w0 = COPY %wzr +# CHECK: $w0 = COPY $wzr name: test21 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1, %x2 + liveins: $w0, $x1, $x2 - %w0 = ANDSWri %w0, 1, implicit-def %nzcv - STRWui killed %w0, %x1, 0 - %w0 = MOVi32imm -1 - STRWui killed %w0, killed %x1, 1 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = ANDSWri $w0, 1, implicit-def $nzcv + STRWui killed $w0, $x1, 0 + $w0 = MOVi32imm -1 + STRWui killed $w0, killed $x1, 1 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %w0 = COPY %wzr - STRWui killed %w0, killed %x2, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x2, 0 bb.2: RET_ReallyLR ... # Negative test - SUBSXri self-clobbers x0, so MOVi64imm can't be removed # CHECK-LABEL: name: test22 -# CHECK: SUBSXri %x0, 1, 0, implicit-def %nzcv +# CHECK: SUBSXri $x0, 1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK: %x0 = MOVi64imm 1 +# CHECK: $x0 = MOVi64imm 1 name: test22 tracksRegLiveness: true body: | bb.0.entry: - liveins: %x0, %x1, %x2 + liveins: $x0, $x1, $x2 - %x0 = SUBSXri %x0, 1, 0, implicit-def %nzcv - STRXui killed %x0, killed %x1, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $x0 = SUBSXri $x0, 1, 0, implicit-def $nzcv + STRXui killed $x0, killed $x1, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %x0 = MOVi64imm 1 - STRXui killed %x0, killed %x2, 0 + $x0 = MOVi64imm 1 + STRXui killed $x0, killed $x2, 0 bb.2: RET_ReallyLR ... # Negative test - bb.1 has multiple preds # CHECK-LABEL: name: test23 -# CHECK: ADDSWri %w0, 1, 0, implicit-def %nzcv +# CHECK: ADDSWri $w0, 1, 0, implicit-def $nzcv # CHECK: bb.1: -# CHECK: COPY %wzr +# CHECK: COPY $wzr name: test23 tracksRegLiveness: true body: | bb.0.entry: - liveins: %w0, %x1, %x2 + liveins: $w0, $x1, $x2 - %w0 = ADDSWri %w0, 1, 0, implicit-def %nzcv - STRWui killed %w0, killed %x1, 0 - Bcc 1, %bb.2, implicit killed %nzcv + $w0 = ADDSWri $w0, 1, 0, implicit-def $nzcv + STRWui killed $w0, killed $x1, 0 + Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 bb.3: B %bb.1 bb.1: - liveins: %x2 + liveins: $x2 - %w0 = COPY %wzr - STRWui killed %w0, killed %x2, 0 + $w0 = COPY $wzr + STRWui killed $w0, killed $x2, 0 bb.2: RET_ReallyLR diff --git a/test/CodeGen/AArch64/macho-trap.ll b/test/CodeGen/AArch64/macho-trap.ll new file mode 100644 index 000000000000..88700f837fed --- /dev/null +++ b/test/CodeGen/AArch64/macho-trap.ll @@ -0,0 +1,7 @@ +; RUN: llc -mtriple=aarch64-apple-ios7.0 %s -o - | FileCheck %s + +define void @test_unreachable() { +; CHECK-LABEL: test_unreachable: +; CHECK: brk #0x1 + unreachable +} diff --git a/test/CodeGen/AArch64/macro-fusion-last.mir b/test/CodeGen/AArch64/macro-fusion-last.mir new file mode 100644 index 000000000000..14937a4794e9 --- /dev/null +++ b/test/CodeGen/AArch64/macro-fusion-last.mir @@ -0,0 +1,28 @@ +# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+arith-bcc-fusion -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION +# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-arith-bcc-fusion -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION +# Make sure the last instruction is correctly macro-fused when scheduling +# top-down (post-ra). +--- +# CHECK-LABEL: name: fuse_last +# CHECK: $x1 = LDRXui $x0, 0 +# NOFUSION: $xzr = SUBSXri killed $x2, 0, 0, implicit-def $nzcv +# CHECK: STRXui killed $x0, killed $x1, 0 +# FUSION: $xzr = SUBSXri killed $x2, 0, 0, implicit-def $nzcv +# CHECK: Bcc 1, %bb.1, implicit killed $nzcv +name: fuse_last +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x2 + + $x1 = LDRXui $x0, 0 + ; There is latency between these two instructions tempting the scheduler to + ; move the SUBSXri in between them. However doing so breaks macro fusion. + STRXui $x0, $x1, 0 + + $xzr = SUBSXri $x2, 0, 0, implicit-def $nzcv + Bcc 1, %bb.1, implicit killed $nzcv + + bb.1: + RET_ReallyLR implicit undef $x0 +... diff --git a/test/CodeGen/AArch64/max-jump-table.ll b/test/CodeGen/AArch64/max-jump-table.ll index 9a0179ecc1b8..612eba8f2ceb 100644 --- a/test/CodeGen/AArch64/max-jump-table.ll +++ b/test/CodeGen/AArch64/max-jump-table.ll @@ -2,6 +2,7 @@ ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m1 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM1 < %t +; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t declare void @ext(i32) @@ -41,7 +42,8 @@ entry: ; CHECKM1-NEXT: %jump-table.0: ; CHECKM1-SAME: %jump-table.1 ; CHECKM1-NOT: %jump-table.2: -; CHEC-NEXT: Function Live Ins: +; CHECKM3-NEXT: %jump-table.0: +; CHECKM3-NOT: %jump-table.1: bb1: tail call void @ext(i32 0) br label %return bb2: tail call void @ext(i32 2) br label %return @@ -78,10 +80,15 @@ entry: ; CHECK-LABEL: function jt2: ; CHECK-NEXT: Jump Tables: ; CHECK0-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.5 %bb.6{{$}} +; CHECK0-NOT: %jump-table.1 ; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4{{$}} +; CHECK4-NOT: %jump-table.1 ; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4{{$}} +; CHECK8-NOT: %jump-table.1 ; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4{{$}} -; CHEC-NEXT: Function Live Ins: +; CHECKM1-NOT: %jump-table.1 +; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.5 %bb.6{{$}} +; CHECKM3-NOT: %jump-table.1 bb1: tail call void @ext(i32 1) br label %return bb2: tail call void @ext(i32 2) br label %return diff --git a/test/CodeGen/AArch64/memcpy-f128.ll b/test/CodeGen/AArch64/memcpy-f128.ll index 7e6ec36104ab..8b91b8431087 100644 --- a/test/CodeGen/AArch64/memcpy-f128.ll +++ b/test/CodeGen/AArch64/memcpy-f128.ll @@ -12,8 +12,8 @@ define void @test1() { ; CHECK: str q0 ; CHECK: ret entry: - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i32 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 undef, i8* align 8 bitcast (%structA* @stubA to i8*), i64 48, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) diff --git a/test/CodeGen/AArch64/merge-store-dependency.ll b/test/CodeGen/AArch64/merge-store-dependency.ll index 4f2af9ed7e65..3b68cbb8c2af 100644 --- a/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/test/CodeGen/AArch64/merge-store-dependency.ll @@ -11,10 +11,10 @@ entry: ; A53: mov [[DATA:w[0-9]+]], w1 ; A53: str q{{[0-9]+}}, {{.*}} ; A53: str q{{[0-9]+}}, {{.*}} -; A53: str [[DATA]], {{.*}} +; A53: str w1, {{.*}} %0 = bitcast %struct1* %fde to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 40, i1 false) %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4 store i16 256, i16* %state, align 8 %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2 @@ -58,6 +58,6 @@ exit: ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) declare i32 @fcntl(i32, i32, ...) declare noalias i8* @foo() diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll index 1d26e4a42b17..5e4e74321704 100644 --- a/test/CodeGen/AArch64/merge-store.ll +++ b/test/CodeGen/AArch64/merge-store.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone | FileCheck %s --check-prefix=CYCLONE --check-prefix=CHECK +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone -mattr=+slow-misaligned-128store | FileCheck %s --check-prefix=SPLITTING --check-prefix=CHECK ; RUN: llc < %s -mtriple=aarch64-eabi -mattr=-slow-misaligned-128store | FileCheck %s --check-prefix=MISALIGNED --check-prefix=CHECK @g0 = external global <3 x float>, align 16 @@ -44,9 +44,9 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) { ; FIXME: Ideally we would like to use a generic target for this test, but this relies ; on suppressing store pairs. -; CYCLONE-LABEL: merge_vec_extract_stores -; CYCLONE: ext v1.16b, v0.16b, v0.16b, #8 -; CYCLONE-NEXT: str d0, [x0, #24] -; CYCLONE-NEXT: str d1, [x0, #32] -; CYCLONE-NEXT: ret +; SPLITTING-LABEL: merge_vec_extract_stores +; SPLITTING: ext v1.16b, v0.16b, v0.16b, #8 +; SPLITTING-NEXT: str d0, [x0, #24] +; SPLITTING-NEXT: str d1, [x0, #32] +; SPLITTING-NEXT: ret } diff --git a/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll b/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll index 74aeaf75d037..fbaef9cc0751 100644 --- a/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll +++ b/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll @@ -4,20 +4,18 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios10.0.0" ; PR33475 - Expect 64-bit operations as 128-operations are not legal +; However, we can generate a paired 64-bit loads and stores, without using +; floating point registers. ; CHECK-LABEL: pr33475 -; CHECK-DAG: ldr [[R0:x[0-9]+]], [x1] -; CHECK-DAG: str [[R0]], [x0] -; CHECK-DAG: ldr [[R1:x[0-9]+]], [x1, #8] -; CHECK-DAG: str [[R1]], [x0, #8] -; CHECK-DAG: ldr [[R2:x[0-9]+]], [x1, #16] -; CHECK-DAG: str [[R2]], [x0, #16] -; CHECK-DAG: ldr [[R3:x[0-9]+]], [x1, #24] -; CHECK-DAG: str [[R3]], [x0, #24] +; CHECK-DAG: ldp [[R0:x[0-9]+]], [[R0:x[0-9]+]], [x1, #16] +; CHECK-DAG: ldp [[R0:x[0-9]+]], [[R0:x[0-9]+]], [x1] +; CHECK-DAG: stp [[R0:x[0-9]+]], [[R0:x[0-9]+]], [x0, #16] +; CHECK-DAG: stp [[R0:x[0-9]+]], [[R0:x[0-9]+]], [x0] define void @pr33475(i8* %p0, i8* %p1) noimplicitfloat { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p0, i8* %p1, i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p0, i8* align 4 %p1, i64 32, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/test/CodeGen/AArch64/minmax-of-minmax.ll b/test/CodeGen/AArch64/minmax-of-minmax.ll new file mode 100644 index 000000000000..9257832d4c4b --- /dev/null +++ b/test/CodeGen/AArch64/minmax-of-minmax.ll @@ -0,0 +1,2441 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +; There are 4 commuted variants (abbc/abcb/bcab/bcba) * +; 4 predicate variants ([*][lg][te]) * +; 4 min/max flavors (smin/smax/umin/umax) * +; 2 notted variants +; = 128 tests + +define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp slt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp slt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp slt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp slt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sgt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sgt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sgt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sgt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sle <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sle <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sle <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sle <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sgt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sgt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sgt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sgt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp slt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp slt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp slt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp slt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sle <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sle <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sle <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sle <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ult <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ult <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ult <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ult <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ugt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ugt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ugt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ugt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ule <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ule <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ule <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ule <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp uge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp uge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp uge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp uge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ugt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ugt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ugt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ugt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ult <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ult <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ult <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ult <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp uge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp uge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp uge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp uge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ule <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ule <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ule <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ule <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp slt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp slt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp slt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp slt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sgt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sgt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sgt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sgt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sle <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sle <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sle <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sle <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sgt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sgt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sgt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sgt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp slt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp slt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp slt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp slt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sle <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sle <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sle <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sle <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ult <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ult <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ult <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ult <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ugt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ugt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ugt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ugt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ule <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ule <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ule <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ule <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp uge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp uge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp uge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp uge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ugt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ugt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ugt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ugt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ult <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ult <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ult <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ult <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp uge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp uge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp uge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp uge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ule <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ule <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ule <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> + %c = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1> + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ule <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + diff --git a/test/CodeGen/AArch64/misched-fusion-addr.ll b/test/CodeGen/AArch64/misched-fusion-addr.ll new file mode 100644 index 000000000000..82da67842822 --- /dev/null +++ b/test/CodeGen/AArch64/misched-fusion-addr.ll @@ -0,0 +1,115 @@ +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-address | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s + +target triple = "aarch64-unknown" + +@var_8bit = global i8 0 +@var_16bit = global i16 0 +@var_32bit = global i32 0 +@var_64bit = global i64 0 +@var_128bit = global i128 0 +@var_half = global half 0.0 +@var_float = global float 0.0 +@var_double = global double 0.0 +@var_double2 = global <2 x double> <double 0.0, double 0.0> + +define void @ldst_8bit() { + %val8 = load volatile i8, i8* @var_8bit + %ext = zext i8 %val8 to i64 + %add = add i64 %ext, 1 + %val16 = trunc i64 %add to i16 + store volatile i16 %val16, i16* @var_16bit + ret void + +; CHECK-LABEL: ldst_8bit: +; CHECK: adrp [[RB:x[0-9]+]], var_8bit +; CHECK-NEXT: ldrb {{w[0-9]+}}, {{\[}}[[RB]], {{#?}}:lo12:var_8bit{{\]}} +; CHECK: adrp [[RH:x[0-9]+]], var_16bit +; CHECK-NEXT: strh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}} +} + +define void @ldst_16bit() { + %val16 = load volatile i16, i16* @var_16bit + %ext = zext i16 %val16 to i64 + %add = add i64 %ext, 1 + %val32 = trunc i64 %add to i32 + store volatile i32 %val32, i32* @var_32bit + ret void + +; CHECK-LABEL: ldst_16bit: +; CHECK: adrp [[RH:x[0-9]+]], var_16bit +; CHECK-NEXT: ldrh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}} +; CHECK: adrp [[RW:x[0-9]+]], var_32bit +; CHECK-NEXT: str {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}} +} + +define void @ldst_32bit() { + %val32 = load volatile i32, i32* @var_32bit + %ext = zext i32 %val32 to i64 + %val64 = add i64 %ext, 1 + store volatile i64 %val64, i64* @var_64bit + ret void + +; CHECK-LABEL: ldst_32bit: +; CHECK: adrp [[RW:x[0-9]+]], var_32bit +; CHECK-NEXT: ldr {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}} +; CHECK: adrp [[RL:x[0-9]+]], var_64bit +; CHECK-NEXT: str {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}} +} + +define void @ldst_64bit() { + %val64 = load volatile i64, i64* @var_64bit + %ext = zext i64 %val64 to i128 + %val128 = add i128 %ext, 1 + store volatile i128 %val128, i128* @var_128bit + ret void + +; CHECK-LABEL: ldst_64bit: +; CHECK: adrp [[RL:x[0-9]+]], var_64bit +; CHECK-NEXT: ldr {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}} +; CHECK: adrp [[RQ:x[0-9]+]], var_128bit +; CHECK-NEXT: add {{x[0-9]+}}, [[RQ]], {{#?}}:lo12:var_128bit +} + +define void @ldst_half() { + %valh = load volatile half, half* @var_half + %valf = fpext half %valh to float + store volatile float %valf, float* @var_float + ret void + +; CHECK-LABEL: ldst_half: +; CHECK: adrp [[RH:x[0-9]+]], var_half +; CHECK-NEXT: ldr {{h[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_half{{\]}} +; CHECK: adrp [[RF:x[0-9]+]], var_float +; CHECK-NEXT: str {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}} +} + +define void @ldst_float() { + %valf = load volatile float, float* @var_float + %vald = fpext float %valf to double + store volatile double %vald, double* @var_double + ret void + +; CHECK-LABEL: ldst_float: +; CHECK: adrp [[RF:x[0-9]+]], var_float +; CHECK-NEXT: ldr {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}} +; CHECK: adrp [[RD:x[0-9]+]], var_double +; CHECK-NEXT: str {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}} +} + +define void @ldst_double() { + %valf = load volatile float, float* @var_float + %vale = fpext float %valf to double + %vald = load volatile double, double* @var_double + %vald1 = insertelement <2 x double> undef, double %vald, i32 0 + %vald2 = insertelement <2 x double> %vald1, double %vale, i32 1 + store volatile <2 x double> %vald2, <2 x double>* @var_double2 + ret void + +; CHECK-LABEL: ldst_double: +; CHECK: adrp [[RD:x[0-9]+]], var_double +; CHECK-NEXT: ldr {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}} +; CHECK: adrp [[RQ:x[0-9]+]], var_double2 +; CHECK-NEXT: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}} +} diff --git a/test/CodeGen/AArch64/misched-fusion-aes.ll b/test/CodeGen/AArch64/misched-fusion-aes.ll index 7901a20ca65f..5e90a80e247a 100644 --- a/test/CodeGen/AArch64/misched-fusion-aes.ll +++ b/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -6,6 +6,8 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m1 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m2 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k) declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %d) diff --git a/test/CodeGen/AArch64/misched-fusion-csel.ll b/test/CodeGen/AArch64/misched-fusion-csel.ll new file mode 100644 index 000000000000..d36d79b87232 --- /dev/null +++ b/test/CodeGen/AArch64/misched-fusion-csel.ll @@ -0,0 +1,31 @@ +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-csel | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s + +target triple = "aarch64-unknown" + +define i32 @test_sub_cselw(i32 %a0, i32 %a1, i32 %a2) { +entry: + %v0 = sub i32 %a0, 13 + %cond = icmp eq i32 %v0, 0 + %v1 = add i32 %a1, 7 + %v2 = select i1 %cond, i32 %a0, i32 %v1 + ret i32 %v2 + +; CHECK-LABEL: test_sub_cselw: +; CHECK: cmp {{w[0-9]}}, #13 +; CHECK-NEXT: csel {{w[0-9]}} +} + +define i64 @test_sub_cselx(i64 %a0, i64 %a1, i64 %a2) { +entry: + %v0 = sub i64 %a0, 13 + %cond = icmp eq i64 %v0, 0 + %v1 = add i64 %a1, 7 + %v2 = select i1 %cond, i64 %a0, i64 %v1 + ret i64 %v2 + +; CHECK-LABEL: test_sub_cselx: +; CHECK: cmp {{x[0-9]}}, #13 +; CHECK-NEXT: csel {{x[0-9]}} +} diff --git a/test/CodeGen/AArch64/misched-fusion-lit.ll b/test/CodeGen/AArch64/misched-fusion-lit.ll index 45aa67ef1d54..49ae920d0107 100644 --- a/test/CodeGen/AArch64/misched-fusion-lit.ll +++ b/test/CodeGen/AArch64/misched-fusion-lit.ll @@ -1,6 +1,8 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE @g = common local_unnamed_addr global i8* null, align 8 diff --git a/test/CodeGen/AArch64/misched-stp.ll b/test/CodeGen/AArch64/misched-stp.ll index 1c9ea68834c2..370bf3fb9643 100644 --- a/test/CodeGen/AArch64/misched-stp.ll +++ b/test/CodeGen/AArch64/misched-stp.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -o - | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa,+slow-misaligned-128store -enable-misched -verify-misched -o - | FileCheck %s ; Tests to check that the scheduler dependencies derived from alias analysis are ; correct when we have loads that have been split up so that they can later be @@ -30,7 +30,7 @@ entry: ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) %struct.tree_common = type { i8*, i8*, i32 } ; CHECK-LABEL: test_zero @@ -41,7 +41,7 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) { entry: %0 = bitcast %struct.tree_common* %t to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false) %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2 store i32 %code, i32* %code1, align 8 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 diff --git a/test/CodeGen/AArch64/mlicm-stack-write-check.mir b/test/CodeGen/AArch64/mlicm-stack-write-check.mir new file mode 100644 index 000000000000..c5d1a8f81fcf --- /dev/null +++ b/test/CodeGen/AArch64/mlicm-stack-write-check.mir @@ -0,0 +1,32 @@ +# RUN: llc -mtriple=aarch64 -run-pass machinelicm -verify-machineinstrs -o - %s | FileCheck %s +--- +name: test +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } +stack: + - { id: 0, size: 8, type: spill-slot } +body: | + bb.0: + ; CHECK-LABEL: name: test + ; CHECK-LABEL: bb.0: + ; CHECK: $x2 = LDRXui %stack.0, 0 + liveins: $x0, $x1, $x2 + B %bb.1 + + bb.1: + ; CHECK-LABEL: bb.1: + ; CHECK-NOT: $x2 = LDRXui %stack.0, 0 + liveins: $x0 + DBG_VALUE %stack.0, 0 + $x2 = LDRXui %stack.0, 0 :: (load 8 from %stack.0) + $x0 = ADDXrr $x0, $x2 + $xzr = SUBSXri $x0, 1, 0, implicit-def $nzcv + Bcc 11, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + liveins: $x0 + %0 = COPY $x0 + %0 = COPY $x0 ; Force isSSA = false. +... diff --git a/test/CodeGen/AArch64/movimm-wzr.mir b/test/CodeGen/AArch64/movimm-wzr.mir index 60e9bfa03a96..50405c79973e 100644 --- a/test/CodeGen/AArch64/movimm-wzr.mir +++ b/test/CodeGen/AArch64/movimm-wzr.mir @@ -32,11 +32,11 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0 (%ir-block.0): - %wzr = MOVi32imm 42 - %xzr = MOVi64imm 42 - RET_ReallyLR implicit killed %w0 + $wzr = MOVi32imm 42 + $xzr = MOVi64imm 42 + RET_ReallyLR implicit killed $w0 ... # CHECK: bb.0 -# CHECK-NEXT: RET undef %lr +# CHECK-NEXT: RET undef $lr diff --git a/test/CodeGen/AArch64/neg-imm.ll b/test/CodeGen/AArch64/neg-imm.ll index 46bded78cc59..ee95f37c203c 100644 --- a/test/CodeGen/AArch64/neg-imm.ll +++ b/test/CodeGen/AArch64/neg-imm.ll @@ -7,8 +7,8 @@ declare void @foo(i32) define void @test(i32 %px) { ; CHECK_LABEL: test: ; CHECK_LABEL: %entry -; CHECK: subs -; CHECK-NEXT: csel +; CHECK: subs [[REG0:w[0-9]+]], +; CHECK: csel {{w[0-9]+}}, wzr, [[REG0]] entry: %sub = add nsw i32 %px, -1 %cmp = icmp slt i32 %px, 1 diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 6497856c7d36..8af8fd2be94b 100644 --- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -46,7 +46,8 @@ define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) { define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: bsl8xi8_const: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: movi {{d[0-9]+}}, #0x{{0*}}ffff0000ffff +; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 > %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 > %tmp3 = or <8 x i8> %tmp1, %tmp2 @@ -55,6 +56,7 @@ define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) { define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: bsl16xi8_const: +; CHECK: movi {{v[0-9]+}}.2d, #0x{{0*}}ffffffff ; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 > %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -64,7 +66,7 @@ define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) { define <8 x i8> @orn8xi8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: orn8xi8: -; CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = or <8 x i8> %a, %tmp1 ret <8 x i8> %tmp2 @@ -72,7 +74,7 @@ define <8 x i8> @orn8xi8(<8 x i8> %a, <8 x i8> %b) { define <16 x i8> @orn16xi8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: orn16xi8: -; CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = or <16 x i8> %a, %tmp1 ret <16 x i8> %tmp2 @@ -80,7 +82,7 @@ define <16 x i8> @orn16xi8(<16 x i8> %a, <16 x i8> %b) { define <8 x i8> @bic8xi8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: bic8xi8: -; CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = and <8 x i8> %a, %tmp1 ret <8 x i8> %tmp2 @@ -88,7 +90,7 @@ define <8 x i8> @bic8xi8(<8 x i8> %a, <8 x i8> %b) { define <16 x i8> @bic16xi8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: bic16xi8: -; CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = and <16 x i8> %a, %tmp1 ret <16 x i8> %tmp2 @@ -96,175 +98,175 @@ define <16 x i8> @bic16xi8(<16 x i8> %a, <16 x i8> %b) { define <2 x i32> @orrimm2s_lsl0(<2 x i32> %a) { ; CHECK-LABEL: orrimm2s_lsl0: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}} %tmp1 = or <2 x i32> %a, < i32 255, i32 255> ret <2 x i32> %tmp1 } define <2 x i32> @orrimm2s_lsl8(<2 x i32> %a) { ; CHECK-LABEL: orrimm2s_lsl8: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 %tmp1 = or <2 x i32> %a, < i32 65280, i32 65280> ret <2 x i32> %tmp1 } define <2 x i32> @orrimm2s_lsl16(<2 x i32> %a) { ; CHECK-LABEL: orrimm2s_lsl16: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 %tmp1 = or <2 x i32> %a, < i32 16711680, i32 16711680> ret <2 x i32> %tmp1 } define <2 x i32> @orrimm2s_lsl24(<2 x i32> %a) { ; CHECK-LABEL: orrimm2s_lsl24: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24 %tmp1 = or <2 x i32> %a, < i32 4278190080, i32 4278190080> ret <2 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl0(<4 x i32> %a) { ; CHECK-LABEL: orrimm4s_lsl0: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}} %tmp1 = or <4 x i32> %a, < i32 255, i32 255, i32 255, i32 255> ret <4 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl8(<4 x i32> %a) { ; CHECK-LABEL: orrimm4s_lsl8: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 %tmp1 = or <4 x i32> %a, < i32 65280, i32 65280, i32 65280, i32 65280> ret <4 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl16(<4 x i32> %a) { ; CHECK-LABEL: orrimm4s_lsl16: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 %tmp1 = or <4 x i32> %a, < i32 16711680, i32 16711680, i32 16711680, i32 16711680> ret <4 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl24(<4 x i32> %a) { ; CHECK-LABEL: orrimm4s_lsl24: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24 %tmp1 = or <4 x i32> %a, < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080> ret <4 x i32> %tmp1 } define <4 x i16> @orrimm4h_lsl0(<4 x i16> %a) { ; CHECK-LABEL: orrimm4h_lsl0: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} %tmp1 = or <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255 > ret <4 x i16> %tmp1 } define <4 x i16> @orrimm4h_lsl8(<4 x i16> %a) { ; CHECK-LABEL: orrimm4h_lsl8: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 %tmp1 = or <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 > ret <4 x i16> %tmp1 } define <8 x i16> @orrimm8h_lsl0(<8 x i16> %a) { ; CHECK-LABEL: orrimm8h_lsl0: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} %tmp1 = or <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > ret <8 x i16> %tmp1 } define <8 x i16> @orrimm8h_lsl8(<8 x i16> %a) { ; CHECK-LABEL: orrimm8h_lsl8: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 %tmp1 = or <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > ret <8 x i16> %tmp1 } define <2 x i32> @bicimm2s_lsl0(<2 x i32> %a) { ; CHECK-LABEL: bicimm2s_lsl0: -; CHECK: bic {{v[0-9]+}}.2s, #{{0x10|16}} +; CHECK: bic {{v[0-9]+}}.2s, #{{0x10|16}} %tmp1 = and <2 x i32> %a, < i32 4294967279, i32 4294967279 > ret <2 x i32> %tmp1 } define <2 x i32> @bicimm2s_lsl8(<2 x i32> %a) { ; CHECK-LABEL: bicimm2s_lsl8: -; CHECK: bic {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #8 %tmp1 = and <2 x i32> %a, < i32 4294963199, i32 4294963199 > ret <2 x i32> %tmp1 } define <2 x i32> @bicimm2s_lsl16(<2 x i32> %a) { ; CHECK-LABEL: bicimm2s_lsl16: -; CHECK: bic {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #16 +; CHECK: bic {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #16 %tmp1 = and <2 x i32> %a, < i32 4293918719, i32 4293918719 > ret <2 x i32> %tmp1 } define <2 x i32> @bicimm2s_lsl124(<2 x i32> %a) { ; CHECK-LABEL: bicimm2s_lsl124: -; CHECK: bic {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #24 +; CHECK: bic {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #24 %tmp1 = and <2 x i32> %a, < i32 4026531839, i32 4026531839> ret <2 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl0(<4 x i32> %a) { ; CHECK-LABEL: bicimm4s_lsl0: -; CHECK: bic {{v[0-9]+}}.4s, #{{0x10|16}} +; CHECK: bic {{v[0-9]+}}.4s, #{{0x10|16}} %tmp1 = and <4 x i32> %a, < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 > ret <4 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl8(<4 x i32> %a) { ; CHECK-LABEL: bicimm4s_lsl8: -; CHECK: bic {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #8 %tmp1 = and <4 x i32> %a, < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 > ret <4 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl16(<4 x i32> %a) { ; CHECK-LABEL: bicimm4s_lsl16: -; CHECK: bic {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #16 +; CHECK: bic {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #16 %tmp1 = and <4 x i32> %a, < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 > ret <4 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl124(<4 x i32> %a) { ; CHECK-LABEL: bicimm4s_lsl124: -; CHECK: bic {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #24 +; CHECK: bic {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #24 %tmp1 = and <4 x i32> %a, < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839> ret <4 x i32> %tmp1 } define <4 x i16> @bicimm4h_lsl0_a(<4 x i16> %a) { ; CHECK-LABEL: bicimm4h_lsl0_a: -; CHECK: bic {{v[0-9]+}}.4h, #{{0x10|16}} +; CHECK: bic {{v[0-9]+}}.4h, #{{0x10|16}} %tmp1 = and <4 x i16> %a, < i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279 > ret <4 x i16> %tmp1 } define <4 x i16> @bicimm4h_lsl0_b(<4 x i16> %a) { ; CHECK-LABEL: bicimm4h_lsl0_b: -; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}} %tmp1 = and <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 > ret <4 x i16> %tmp1 } define <4 x i16> @bicimm4h_lsl8_a(<4 x i16> %a) { ; CHECK-LABEL: bicimm4h_lsl8_a: -; CHECK: bic {{v[0-9]+}}.4h, #{{0x10|16}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.4h, #{{0x10|16}}, lsl #8 %tmp1 = and <4 x i16> %a, < i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199> ret <4 x i16> %tmp1 } define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) { ; CHECK-LABEL: bicimm4h_lsl8_b: -; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 %tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255> ret <4 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) { ; CHECK-LABEL: bicimm8h_lsl0_a: -; CHECK: bic {{v[0-9]+}}.8h, #{{0x10|16}} +; CHECK: bic {{v[0-9]+}}.8h, #{{0x10|16}} %tmp1 = and <8 x i16> %a, < i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279 > ret <8 x i16> %tmp1 @@ -272,14 +274,14 @@ define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) { define <8 x i16> @bicimm8h_lsl0_b(<8 x i16> %a) { ; CHECK-LABEL: bicimm8h_lsl0_b: -; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}} %tmp1 = and <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > ret <8 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) { ; CHECK-LABEL: bicimm8h_lsl8_a: -; CHECK: bic {{v[0-9]+}}.8h, #{{0x10|16}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.8h, #{{0x10|16}}, lsl #8 %tmp1 = and <8 x i16> %a, < i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199> ret <8 x i16> %tmp1 @@ -287,7 +289,7 @@ define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) { define <8 x i16> @bicimm8h_lsl8_b(<8 x i16> %a) { ; CHECK-LABEL: bicimm8h_lsl8_b: -; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 %tmp1 = and <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> ret <8 x i16> %tmp1 } @@ -421,7 +423,7 @@ define <2 x i64> @eor2xi64(<2 x i64> %a, <2 x i64> %b) { define <2 x i32> @bic2xi32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: bic2xi32: -; CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 > %tmp2 = and <2 x i32> %a, %tmp1 ret <2 x i32> %tmp2 @@ -429,7 +431,7 @@ define <2 x i32> @bic2xi32(<2 x i32> %a, <2 x i32> %b) { define <4 x i16> @bic4xi16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: bic4xi16: -; CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 > %tmp2 = and <4 x i16> %a, %tmp1 ret <4 x i16> %tmp2 @@ -437,7 +439,7 @@ define <4 x i16> @bic4xi16(<4 x i16> %a, <4 x i16> %b) { define <1 x i64> @bic1xi64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: bic1xi64: -; CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <1 x i64> %b, < i64 -1> %tmp2 = and <1 x i64> %a, %tmp1 ret <1 x i64> %tmp2 @@ -445,7 +447,7 @@ define <1 x i64> @bic1xi64(<1 x i64> %a, <1 x i64> %b) { define <4 x i32> @bic4xi32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: bic4xi32: -; CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1> %tmp2 = and <4 x i32> %a, %tmp1 ret <4 x i32> %tmp2 @@ -453,7 +455,7 @@ define <4 x i32> @bic4xi32(<4 x i32> %a, <4 x i32> %b) { define <8 x i16> @bic8xi16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: bic8xi16: -; CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 > %tmp2 = and <8 x i16> %a, %tmp1 ret <8 x i16> %tmp2 @@ -461,7 +463,7 @@ define <8 x i16> @bic8xi16(<8 x i16> %a, <8 x i16> %b) { define <2 x i64> @bic2xi64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: bic2xi64: -; CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1> %tmp2 = and <2 x i64> %a, %tmp1 ret <2 x i64> %tmp2 @@ -469,7 +471,7 @@ define <2 x i64> @bic2xi64(<2 x i64> %a, <2 x i64> %b) { define <2 x i32> @orn2xi32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: orn2xi32: -; CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 > %tmp2 = or <2 x i32> %a, %tmp1 ret <2 x i32> %tmp2 @@ -477,7 +479,7 @@ define <2 x i32> @orn2xi32(<2 x i32> %a, <2 x i32> %b) { define <4 x i16> @orn4xi16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: orn4xi16: -; CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 > %tmp2 = or <4 x i16> %a, %tmp1 ret <4 x i16> %tmp2 @@ -485,7 +487,7 @@ define <4 x i16> @orn4xi16(<4 x i16> %a, <4 x i16> %b) { define <1 x i64> @orn1xi64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: orn1xi64: -; CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <1 x i64> %b, < i64 -1> %tmp2 = or <1 x i64> %a, %tmp1 ret <1 x i64> %tmp2 @@ -493,7 +495,7 @@ define <1 x i64> @orn1xi64(<1 x i64> %a, <1 x i64> %b) { define <4 x i32> @orn4xi32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: orn4xi32: -; CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1> %tmp2 = or <4 x i32> %a, %tmp1 ret <4 x i32> %tmp2 @@ -501,7 +503,7 @@ define <4 x i32> @orn4xi32(<4 x i32> %a, <4 x i32> %b) { define <8 x i16> @orn8xi16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: orn8xi16: -; CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 > %tmp2 = or <8 x i16> %a, %tmp1 ret <8 x i16> %tmp2 @@ -509,7 +511,7 @@ define <8 x i16> @orn8xi16(<8 x i16> %a, <8 x i16> %b) { define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: orn2xi64: -; CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1> %tmp2 = or <2 x i64> %a, %tmp1 ret <2 x i64> %tmp2 @@ -517,7 +519,8 @@ define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) { define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: bsl2xi32_const: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: movi {{d[0-9]+}}, #0x{{0*}}ffffffff +; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <2 x i32> %a, < i32 -1, i32 0 > %tmp2 = and <2 x i32> %b, < i32 0, i32 -1 > %tmp3 = or <2 x i32> %tmp1, %tmp2 @@ -527,7 +530,8 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) { define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: bsl4xi16_const: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: movi {{d[0-9]+}}, #0x{{0*}}ffff0000ffff +; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 > %tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 > %tmp3 = or <4 x i16> %tmp1, %tmp2 @@ -536,16 +540,18 @@ define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) { define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: bsl1xi64_const: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp1 = and <1 x i64> %a, < i64 -16 > - %tmp2 = and <1 x i64> %b, < i64 15 > +; CHECK: movi {{d[0-9]+}}, #0xffffffffffffff00 +; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp1 = and <1 x i64> %a, < i64 -256 > + %tmp2 = and <1 x i64> %b, < i64 255 > %tmp3 = or <1 x i64> %tmp1, %tmp2 ret <1 x i64> %tmp3 } define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: bsl4xi32_const: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: movi {{v[0-9]+}}.2d, #0x{{0*}}ffffffff +; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 > %tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 > %tmp3 = or <4 x i32> %tmp1, %tmp2 @@ -554,7 +560,8 @@ define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) { define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: bsl8xi16_const: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: movi {{v[0-9]+}}.2d, #0x{{0*}}ffffffff +; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 > %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 > %tmp3 = or <8 x i16> %tmp1, %tmp2 @@ -563,7 +570,7 @@ define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) { define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: bsl2xi64_const: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <2 x i64> %a, < i64 -1, i64 0 > %tmp2 = and <2 x i64> %b, < i64 0, i64 -1 > %tmp3 = or <2 x i64> %tmp1, %tmp2 @@ -573,7 +580,7 @@ define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) { define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { ; CHECK-LABEL: bsl8xi8: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <8 x i8> %v1, %v2 %2 = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> %3 = and <8 x i8> %2, %v3 @@ -583,7 +590,7 @@ define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { ; CHECK-LABEL: bsl4xi16: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <4 x i16> %v1, %v2 %2 = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1> %3 = and <4 x i16> %2, %v3 @@ -593,7 +600,7 @@ define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { ; CHECK-LABEL: bsl2xi32: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <2 x i32> %v1, %v2 %2 = xor <2 x i32> %v1, <i32 -1, i32 -1> %3 = and <2 x i32> %2, %v3 @@ -603,7 +610,7 @@ define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { ; CHECK-LABEL: bsl1xi64: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <1 x i64> %v1, %v2 %2 = xor <1 x i64> %v1, <i64 -1> %3 = and <1 x i64> %2, %v3 @@ -613,7 +620,7 @@ define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { ; CHECK-LABEL: bsl16xi8: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <16 x i8> %v1, %v2 %2 = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> %3 = and <16 x i8> %2, %v3 @@ -623,7 +630,7 @@ define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { ; CHECK-LABEL: bsl8xi16: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <8 x i16> %v1, %v2 %2 = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> %3 = and <8 x i16> %2, %v3 @@ -633,7 +640,7 @@ define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { ; CHECK-LABEL: bsl4xi32: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <4 x i32> %v1, %v2 %2 = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1> %3 = and <4 x i32> %2, %v3 @@ -643,25 +650,25 @@ define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { define <8 x i8> @vselect_v8i8(<8 x i8> %a) { ; CHECK-LABEL: vselect_v8i8: -; CHECK: movi {{d[0-9]+}}, #0x{{0*}}ffff -; CHECK-NEXT: {{bsl v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b|and v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b}} +; CHECK: movi {{d[0-9]+}}, #0x{{0*}}ffff +; CHECK-NEXT: {{bsl v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b|and v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b}} %b = select <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> <i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> ret <8 x i8> %b } define <4 x i16> @vselect_v4i16(<4 x i16> %a) { ; CHECK-LABEL: vselect_v4i16: -; CHECK: movi {{d[0-9]+}}, #0x{{0*}}ffff -; CHECK-NEXT: {{bsl v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b|and v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b}} +; CHECK: movi {{d[0-9]+}}, #0x{{0*}}ffff +; CHECK-NEXT: {{bsl v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b|and v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b}} %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i16> %a, <4 x i16> <i16 undef, i16 0, i16 0, i16 0> ret <4 x i16> %b } define <8 x i8> @vselect_cmp_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: vselect_cmp_ne: -; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %cmp = icmp ne <8 x i8> %a, %b %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c ret <8 x i8> %d @@ -669,8 +676,8 @@ define <8 x i8> @vselect_cmp_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { define <8 x i8> @vselect_cmp_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: vselect_cmp_eq: -; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %cmp = icmp eq <8 x i8> %a, %b %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c ret <8 x i8> %d @@ -678,9 +685,9 @@ define <8 x i8> @vselect_cmp_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { define <8 x i8> @vselect_cmpz_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: vselect_cmpz_ne: -; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 -; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %cmp = icmp ne <8 x i8> %a, zeroinitializer %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c ret <8 x i8> %d @@ -688,8 +695,8 @@ define <8 x i8> @vselect_cmpz_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { define <8 x i8> @vselect_cmpz_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: vselect_cmpz_eq: -; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 -; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %cmp = icmp eq <8 x i8> %a, zeroinitializer %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c ret <8 x i8> %d @@ -697,8 +704,8 @@ define <8 x i8> @vselect_cmpz_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { define <8 x i8> @vselect_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: vselect_tst: -; CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NEXT: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = and <8 x i8> %a, %b %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer %d = select <8 x i1> %tmp4, <8 x i8> %b, <8 x i8> %c @@ -707,7 +714,7 @@ define <8 x i8> @vselect_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { ; CHECK-LABEL: bsl2xi64: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <2 x i64> %v1, %v2 %2 = xor <2 x i64> %v1, <i64 -1, i64 -1> %3 = and <2 x i64> %2, %v3 @@ -717,84 +724,84 @@ define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { define <8 x i8> @orrimm8b_as_orrimm4h_lsl0(<8 x i8> %a) { ; CHECK-LABEL: orrimm8b_as_orrimm4h_lsl0: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} %val = or <8 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0> ret <8 x i8> %val } define <8 x i8> @orrimm8b_as_orimm4h_lsl8(<8 x i8> %a) { ; CHECK-LABEL: orrimm8b_as_orimm4h_lsl8: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 %val = or <8 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255> ret <8 x i8> %val } define <16 x i8> @orimm16b_as_orrimm8h_lsl0(<16 x i8> %a) { ; CHECK-LABEL: orimm16b_as_orrimm8h_lsl0: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} %val = or <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0> ret <16 x i8> %val } define <16 x i8> @orimm16b_as_orrimm8h_lsl8(<16 x i8> %a) { ; CHECK-LABEL: orimm16b_as_orrimm8h_lsl8: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 %val = or <16 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255> ret <16 x i8> %val } define <8 x i8> @and8imm2s_lsl0(<8 x i8> %a) { ; CHECK-LABEL: and8imm2s_lsl0: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}} %tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255> ret <8 x i8> %tmp1 } define <8 x i8> @and8imm2s_lsl8(<8 x i8> %a) { ; CHECK-LABEL: and8imm2s_lsl8: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 %tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255> ret <8 x i8> %tmp1 } define <8 x i8> @and8imm2s_lsl16(<8 x i8> %a) { ; CHECK-LABEL: and8imm2s_lsl16: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 +; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 %tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255> ret <8 x i8> %tmp1 } define <8 x i8> @and8imm2s_lsl24(<8 x i8> %a) { ; CHECK-LABEL: and8imm2s_lsl24: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xfe|254}}, lsl #24 +; CHECK: bic {{v[0-9]+}}.2s, #{{0xfe|254}}, lsl #24 %tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1> ret <8 x i8> %tmp1 } define <4 x i16> @and16imm2s_lsl0(<4 x i16> %a) { ; CHECK-LABEL: and16imm2s_lsl0: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}} %tmp1 = and <4 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535> ret <4 x i16> %tmp1 } define <4 x i16> @and16imm2s_lsl8(<4 x i16> %a) { ; CHECK-LABEL: and16imm2s_lsl8: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 %tmp1 = and <4 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535> ret <4 x i16> %tmp1 } define <4 x i16> @and16imm2s_lsl16(<4 x i16> %a) { ; CHECK-LABEL: and16imm2s_lsl16: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 +; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 %tmp1 = and <4 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280> ret <4 x i16> %tmp1 } define <4 x i16> @and16imm2s_lsl24(<4 x i16> %a) { ; CHECK-LABEL: and16imm2s_lsl24: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xfe|254}}, lsl #24 +; CHECK: bic {{v[0-9]+}}.2s, #{{0xfe|254}}, lsl #24 %tmp1 = and <4 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511> ret <4 x i16> %tmp1 } @@ -802,448 +809,448 @@ define <4 x i16> @and16imm2s_lsl24(<4 x i16> %a) { define <1 x i64> @and64imm2s_lsl0(<1 x i64> %a) { ; CHECK-LABEL: and64imm2s_lsl0: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}} %tmp1 = and <1 x i64> %a, < i64 -1095216660736> ret <1 x i64> %tmp1 } define <1 x i64> @and64imm2s_lsl8(<1 x i64> %a) { ; CHECK-LABEL: and64imm2s_lsl8: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 %tmp1 = and <1 x i64> %a, < i64 -280375465148161> ret <1 x i64> %tmp1 } define <1 x i64> @and64imm2s_lsl16(<1 x i64> %a) { ; CHECK-LABEL: and64imm2s_lsl16: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 +; CHECK: bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 %tmp1 = and <1 x i64> %a, < i64 -71776119077928961> ret <1 x i64> %tmp1 } define <1 x i64> @and64imm2s_lsl24(<1 x i64> %a) { ; CHECK-LABEL: and64imm2s_lsl24: -; CHECK: bic {{v[0-9]+}}.2s, #{{0xfe|254}}, lsl #24 +; CHECK: bic {{v[0-9]+}}.2s, #{{0xfe|254}}, lsl #24 %tmp1 = and <1 x i64> %a, < i64 144115183814443007> ret <1 x i64> %tmp1 } define <16 x i8> @and8imm4s_lsl0(<16 x i8> %a) { ; CHECK-LABEL: and8imm4s_lsl0: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}} %tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255> ret <16 x i8> %tmp1 } define <16 x i8> @and8imm4s_lsl8(<16 x i8> %a) { ; CHECK-LABEL: and8imm4s_lsl8: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 %tmp1 = and <16 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255> ret <16 x i8> %tmp1 } define <16 x i8> @and8imm4s_lsl16(<16 x i8> %a) { ; CHECK-LABEL: and8imm4s_lsl16: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 +; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 %tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255> ret <16 x i8> %tmp1 } define <16 x i8> @and8imm4s_lsl24(<16 x i8> %a) { ; CHECK-LABEL: and8imm4s_lsl24: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xfe|254}}, lsl #24 +; CHECK: bic {{v[0-9]+}}.4s, #{{0xfe|254}}, lsl #24 %tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1> ret <16 x i8> %tmp1 } define <8 x i16> @and16imm4s_lsl0(<8 x i16> %a) { ; CHECK-LABEL: and16imm4s_lsl0: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}} %tmp1 = and <8 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535> ret <8 x i16> %tmp1 } define <8 x i16> @and16imm4s_lsl8(<8 x i16> %a) { ; CHECK-LABEL: and16imm4s_lsl8: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 %tmp1 = and <8 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535> ret <8 x i16> %tmp1 } define <8 x i16> @and16imm4s_lsl16(<8 x i16> %a) { ; CHECK-LABEL: and16imm4s_lsl16: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 +; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 %tmp1 = and <8 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280> ret <8 x i16> %tmp1 } define <8 x i16> @and16imm4s_lsl24(<8 x i16> %a) { ; CHECK-LABEL: and16imm4s_lsl24: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xfe|254}}, lsl #24 +; CHECK: bic {{v[0-9]+}}.4s, #{{0xfe|254}}, lsl #24 %tmp1 = and <8 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511> ret <8 x i16> %tmp1 } define <2 x i64> @and64imm4s_lsl0(<2 x i64> %a) { ; CHECK-LABEL: and64imm4s_lsl0: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}} %tmp1 = and <2 x i64> %a, < i64 -1095216660736, i64 -1095216660736> ret <2 x i64> %tmp1 } define <2 x i64> @and64imm4s_lsl8(<2 x i64> %a) { ; CHECK-LABEL: and64imm4s_lsl8: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 %tmp1 = and <2 x i64> %a, < i64 -280375465148161, i64 -280375465148161> ret <2 x i64> %tmp1 } define <2 x i64> @and64imm4s_lsl16(<2 x i64> %a) { ; CHECK-LABEL: and64imm4s_lsl16: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 +; CHECK: bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 %tmp1 = and <2 x i64> %a, < i64 -71776119077928961, i64 -71776119077928961> ret <2 x i64> %tmp1 } define <2 x i64> @and64imm4s_lsl24(<2 x i64> %a) { ; CHECK-LABEL: and64imm4s_lsl24: -; CHECK: bic {{v[0-9]+}}.4s, #{{0xfe|254}}, lsl #24 +; CHECK: bic {{v[0-9]+}}.4s, #{{0xfe|254}}, lsl #24 %tmp1 = and <2 x i64> %a, < i64 144115183814443007, i64 144115183814443007> ret <2 x i64> %tmp1 } define <8 x i8> @and8imm4h_lsl0(<8 x i8> %a) { ; CHECK-LABEL: and8imm4h_lsl0: -; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}} %tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255> ret <8 x i8> %tmp1 } define <8 x i8> @and8imm4h_lsl8(<8 x i8> %a) { ; CHECK-LABEL: and8imm4h_lsl8: -; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 %tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0> ret <8 x i8> %tmp1 } define <2 x i32> @and16imm4h_lsl0(<2 x i32> %a) { ; CHECK-LABEL: and16imm4h_lsl0: -; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}} %tmp1 = and <2 x i32> %a, < i32 4278255360, i32 4278255360> ret <2 x i32> %tmp1 } define <2 x i32> @and16imm4h_lsl8(<2 x i32> %a) { ; CHECK-LABEL: and16imm4h_lsl8: -; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 %tmp1 = and <2 x i32> %a, < i32 16711935, i32 16711935> ret <2 x i32> %tmp1 } define <1 x i64> @and64imm4h_lsl0(<1 x i64> %a) { ; CHECK-LABEL: and64imm4h_lsl0: -; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}} %tmp1 = and <1 x i64> %a, < i64 -71777214294589696> ret <1 x i64> %tmp1 } define <1 x i64> @and64imm4h_lsl8(<1 x i64> %a) { ; CHECK-LABEL: and64imm4h_lsl8: -; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 %tmp1 = and <1 x i64> %a, < i64 71777214294589695> ret <1 x i64> %tmp1 } define <16 x i8> @and8imm8h_lsl0(<16 x i8> %a) { ; CHECK-LABEL: and8imm8h_lsl0: -; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}} %tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255 > ret <16 x i8> %tmp1 } define <16 x i8> @and8imm8h_lsl8(<16 x i8> %a) { ; CHECK-LABEL: and8imm8h_lsl8: -; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 %tmp1 = and <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0 > ret <16 x i8> %tmp1 } define <4 x i32> @and16imm8h_lsl0(<4 x i32> %a) { ; CHECK-LABEL: and16imm8h_lsl0: -; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}} %tmp1 = and <4 x i32> %a, < i32 4278255360, i32 4278255360, i32 4278255360, i32 4278255360> ret <4 x i32> %tmp1 } define <4 x i32> @and16imm8h_lsl8(<4 x i32> %a) { ; CHECK-LABEL: and16imm8h_lsl8: -; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 %tmp1 = and <4 x i32> %a, < i32 16711935, i32 16711935, i32 16711935, i32 16711935> ret <4 x i32> %tmp1 } define <2 x i64> @and64imm8h_lsl0(<2 x i64> %a) { ; CHECK-LABEL: and64imm8h_lsl0: -; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}} %tmp1 = and <2 x i64> %a, < i64 -71777214294589696, i64 -71777214294589696> ret <2 x i64> %tmp1 } define <2 x i64> @and64imm8h_lsl8(<2 x i64> %a) { ; CHECK-LABEL: and64imm8h_lsl8: -; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK: bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 %tmp1 = and <2 x i64> %a, < i64 71777214294589695, i64 71777214294589695> ret <2 x i64> %tmp1 } define <8 x i8> @orr8imm2s_lsl0(<8 x i8> %a) { ; CHECK-LABEL: orr8imm2s_lsl0: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}} %tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0> ret <8 x i8> %tmp1 } define <8 x i8> @orr8imm2s_lsl8(<8 x i8> %a) { ; CHECK-LABEL: orr8imm2s_lsl8: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 %tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0> ret <8 x i8> %tmp1 } define <8 x i8> @orr8imm2s_lsl16(<8 x i8> %a) { ; CHECK-LABEL: orr8imm2s_lsl16: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 %tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0> ret <8 x i8> %tmp1 } define <8 x i8> @orr8imm2s_lsl24(<8 x i8> %a) { ; CHECK-LABEL: orr8imm2s_lsl24: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24 %tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255> ret <8 x i8> %tmp1 } define <4 x i16> @orr16imm2s_lsl0(<4 x i16> %a) { ; CHECK-LABEL: orr16imm2s_lsl0: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}} %tmp1 = or <4 x i16> %a, < i16 255, i16 0, i16 255, i16 0> ret <4 x i16> %tmp1 } define <4 x i16> @orr16imm2s_lsl8(<4 x i16> %a) { ; CHECK-LABEL: orr16imm2s_lsl8: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 %tmp1 = or <4 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0> ret <4 x i16> %tmp1 } define <4 x i16> @orr16imm2s_lsl16(<4 x i16> %a) { ; CHECK-LABEL: orr16imm2s_lsl16: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 %tmp1 = or <4 x i16> %a, < i16 0, i16 255, i16 0, i16 255> ret <4 x i16> %tmp1 } define <4 x i16> @orr16imm2s_lsl24(<4 x i16> %a) { ; CHECK-LABEL: orr16imm2s_lsl24: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24 %tmp1 = or <4 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280> ret <4 x i16> %tmp1 } define <1 x i64> @orr64imm2s_lsl0(<1 x i64> %a) { ; CHECK-LABEL: orr64imm2s_lsl0: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}} %tmp1 = or <1 x i64> %a, < i64 1095216660735> ret <1 x i64> %tmp1 } define <1 x i64> @orr64imm2s_lsl8(<1 x i64> %a) { ; CHECK-LABEL: orr64imm2s_lsl8: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8 %tmp1 = or <1 x i64> %a, < i64 280375465148160> ret <1 x i64> %tmp1 } define <1 x i64> @orr64imm2s_lsl16(<1 x i64> %a) { ; CHECK-LABEL: orr64imm2s_lsl16: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16 %tmp1 = or <1 x i64> %a, < i64 71776119077928960> ret <1 x i64> %tmp1 } define <1 x i64> @orr64imm2s_lsl24(<1 x i64> %a) { ; CHECK-LABEL: orr64imm2s_lsl24: -; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24 +; CHECK: orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24 %tmp1 = or <1 x i64> %a, < i64 -72057589759737856> ret <1 x i64> %tmp1 } define <16 x i8> @orr8imm4s_lsl0(<16 x i8> %a) { ; CHECK-LABEL: orr8imm4s_lsl0: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}} %tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0> ret <16 x i8> %tmp1 } define <16 x i8> @orr8imm4s_lsl8(<16 x i8> %a) { ; CHECK-LABEL: orr8imm4s_lsl8: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 %tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0> ret <16 x i8> %tmp1 } define <16 x i8> @orr8imm4s_lsl16(<16 x i8> %a) { ; CHECK-LABEL: orr8imm4s_lsl16: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 %tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0> ret <16 x i8> %tmp1 } define <16 x i8> @orr8imm4s_lsl24(<16 x i8> %a) { ; CHECK-LABEL: orr8imm4s_lsl24: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24 %tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255> ret <16 x i8> %tmp1 } define <8 x i16> @orr16imm4s_lsl0(<8 x i16> %a) { ; CHECK-LABEL: orr16imm4s_lsl0: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}} %tmp1 = or <8 x i16> %a, < i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0> ret <8 x i16> %tmp1 } define <8 x i16> @orr16imm4s_lsl8(<8 x i16> %a) { ; CHECK-LABEL: orr16imm4s_lsl8: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 %tmp1 = or <8 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0> ret <8 x i16> %tmp1 } define <8 x i16> @orr16imm4s_lsl16(<8 x i16> %a) { ; CHECK-LABEL: orr16imm4s_lsl16: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 %tmp1 = or <8 x i16> %a, < i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255> ret <8 x i16> %tmp1 } define <8 x i16> @orr16imm4s_lsl24(<8 x i16> %a) { ; CHECK-LABEL: orr16imm4s_lsl24: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24 %tmp1 = or <8 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280> ret <8 x i16> %tmp1 } define <2 x i64> @orr64imm4s_lsl0(<2 x i64> %a) { ; CHECK-LABEL: orr64imm4s_lsl0: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}} %tmp1 = or <2 x i64> %a, < i64 1095216660735, i64 1095216660735> ret <2 x i64> %tmp1 } define <2 x i64> @orr64imm4s_lsl8(<2 x i64> %a) { ; CHECK-LABEL: orr64imm4s_lsl8: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8 %tmp1 = or <2 x i64> %a, < i64 280375465148160, i64 280375465148160> ret <2 x i64> %tmp1 } define <2 x i64> @orr64imm4s_lsl16(<2 x i64> %a) { ; CHECK-LABEL: orr64imm4s_lsl16: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16 %tmp1 = or <2 x i64> %a, < i64 71776119077928960, i64 71776119077928960> ret <2 x i64> %tmp1 } define <2 x i64> @orr64imm4s_lsl24(<2 x i64> %a) { ; CHECK-LABEL: orr64imm4s_lsl24: -; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24 +; CHECK: orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24 %tmp1 = or <2 x i64> %a, < i64 -72057589759737856, i64 -72057589759737856> ret <2 x i64> %tmp1 } define <8 x i8> @orr8imm4h_lsl0(<8 x i8> %a) { ; CHECK-LABEL: orr8imm4h_lsl0: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} %tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0> ret <8 x i8> %tmp1 } define <8 x i8> @orr8imm4h_lsl8(<8 x i8> %a) { ; CHECK-LABEL: orr8imm4h_lsl8: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 %tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255> ret <8 x i8> %tmp1 } define <2 x i32> @orr16imm4h_lsl0(<2 x i32> %a) { ; CHECK-LABEL: orr16imm4h_lsl0: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} %tmp1 = or <2 x i32> %a, < i32 16711935, i32 16711935> ret <2 x i32> %tmp1 } define <2 x i32> @orr16imm4h_lsl8(<2 x i32> %a) { ; CHECK-LABEL: orr16imm4h_lsl8: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 %tmp1 = or <2 x i32> %a, < i32 4278255360, i32 4278255360> ret <2 x i32> %tmp1 } define <1 x i64> @orr64imm4h_lsl0(<1 x i64> %a) { ; CHECK-LABEL: orr64imm4h_lsl0: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}} %tmp1 = or <1 x i64> %a, < i64 71777214294589695> ret <1 x i64> %tmp1 } define <1 x i64> @orr64imm4h_lsl8(<1 x i64> %a) { ; CHECK-LABEL: orr64imm4h_lsl8: -; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 %tmp1 = or <1 x i64> %a, < i64 -71777214294589696> ret <1 x i64> %tmp1 } define <16 x i8> @orr8imm8h_lsl0(<16 x i8> %a) { ; CHECK-LABEL: orr8imm8h_lsl0: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} %tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0> ret <16 x i8> %tmp1 } define <16 x i8> @orr8imm8h_lsl8(<16 x i8> %a) { ; CHECK-LABEL: orr8imm8h_lsl8: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 %tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255> ret <16 x i8> %tmp1 } define <4 x i32> @orr16imm8h_lsl0(<4 x i32> %a) { ; CHECK-LABEL: orr16imm8h_lsl0: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} %tmp1 = or <4 x i32> %a, < i32 16711935, i32 16711935, i32 16711935, i32 16711935> ret <4 x i32> %tmp1 } define <4 x i32> @orr16imm8h_lsl8(<4 x i32> %a) { ; CHECK-LABEL: orr16imm8h_lsl8: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 %tmp1 = or <4 x i32> %a, < i32 4278255360, i32 4278255360, i32 4278255360, i32 4278255360> ret <4 x i32> %tmp1 } define <2 x i64> @orr64imm8h_lsl0(<2 x i64> %a) { ; CHECK-LABEL: orr64imm8h_lsl0: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}} %tmp1 = or <2 x i64> %a, < i64 71777214294589695, i64 71777214294589695> ret <2 x i64> %tmp1 } define <2 x i64> @orr64imm8h_lsl8(<2 x i64> %a) { ; CHECK-LABEL: orr64imm8h_lsl8: -; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK: orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 %tmp1 = or <2 x i64> %a, < i64 -71777214294589696, i64 -71777214294589696> ret <2 x i64> %tmp1 } diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll index 887cb5dd698a..8bb7cc8c1430 100644 --- a/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -1092,63 +1092,64 @@ define <2 x i64> @cmneqz2xi64(<2 x i64> %A) { define <8 x i8> @cmhsz8xi8(<8 x i8> %A) { ; CHECK-LABEL: cmhsz8xi8: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.8b, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = icmp uge <8 x i8> %A, zeroinitializer; + %tmp3 = icmp uge <8 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } define <16 x i8> @cmhsz16xi8(<16 x i8> %A) { ; CHECK-LABEL: cmhsz16xi8: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.16b, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = icmp uge <16 x i8> %A, zeroinitializer; + %tmp3 = icmp uge <16 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } define <4 x i16> @cmhsz4xi16(<4 x i16> %A) { ; CHECK-LABEL: cmhsz4xi16: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.4h, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = icmp uge <4 x i16> %A, zeroinitializer; + %tmp3 = icmp uge <4 x i16> %A, <i16 2, i16 2, i16 2, i16 2> %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } define <8 x i16> @cmhsz8xi16(<8 x i16> %A) { ; CHECK-LABEL: cmhsz8xi16: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.8h, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = icmp uge <8 x i16> %A, zeroinitializer; + %tmp3 = icmp uge <8 x i16> %A, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } define <2 x i32> @cmhsz2xi32(<2 x i32> %A) { ; CHECK-LABEL: cmhsz2xi32: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.2s, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = icmp uge <2 x i32> %A, zeroinitializer; + %tmp3 = icmp uge <2 x i32> %A, <i32 2, i32 2> %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @cmhsz4xi32(<4 x i32> %A) { ; CHECK-LABEL: cmhsz4xi32: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.4s, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = icmp uge <4 x i32> %A, zeroinitializer; + %tmp3 = icmp uge <4 x i32> %A, <i32 2, i32 2, i32 2, i32 2> %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @cmhsz2xi64(<2 x i64> %A) { ; CHECK-LABEL: cmhsz2xi64: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: orr w[[TWO:[0-9]+]], wzr, #0x2 +; CHECK-NEXT: {{v[0-9]+}}.2d, x[[TWO]] ; CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = icmp uge <2 x i64> %A, zeroinitializer; + %tmp3 = icmp uge <2 x i64> %A, <i64 2, i64 2> %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -1156,63 +1157,64 @@ define <2 x i64> @cmhsz2xi64(<2 x i64> %A) { define <8 x i8> @cmhiz8xi8(<8 x i8> %A) { ; CHECK-LABEL: cmhiz8xi8: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.8b, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = icmp ugt <8 x i8> %A, zeroinitializer; + %tmp3 = icmp ugt <8 x i8> %A, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } define <16 x i8> @cmhiz16xi8(<16 x i8> %A) { ; CHECK-LABEL: cmhiz16xi8: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.16b, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = icmp ugt <16 x i8> %A, zeroinitializer; + %tmp3 = icmp ugt <16 x i8> %A, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } define <4 x i16> @cmhiz4xi16(<4 x i16> %A) { ; CHECK-LABEL: cmhiz4xi16: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.4h, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = icmp ugt <4 x i16> %A, zeroinitializer; + %tmp3 = icmp ugt <4 x i16> %A, <i16 1, i16 1, i16 1, i16 1> %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } define <8 x i16> @cmhiz8xi16(<8 x i16> %A) { ; CHECK-LABEL: cmhiz8xi16: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.8h, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = icmp ugt <8 x i16> %A, zeroinitializer; + %tmp3 = icmp ugt <8 x i16> %A, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } define <2 x i32> @cmhiz2xi32(<2 x i32> %A) { ; CHECK-LABEL: cmhiz2xi32: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.2s, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = icmp ugt <2 x i32> %A, zeroinitializer; + %tmp3 = icmp ugt <2 x i32> %A, <i32 1, i32 1> %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @cmhiz4xi32(<4 x i32> %A) { ; CHECK-LABEL: cmhiz4xi32: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.4s, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = icmp ugt <4 x i32> %A, zeroinitializer; + %tmp3 = icmp ugt <4 x i32> %A, <i32 1, i32 1, i32 1, i32 1> %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @cmhiz2xi64(<2 x i64> %A) { ; CHECK-LABEL: cmhiz2xi64: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: orr w[[ONE:[0-9]+]], wzr, #{{0x1|1}} +; CHECK-NEXT: dup {{v[0-9]+}}.2d, x[[ONE]] ; CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = icmp ugt <2 x i64> %A, zeroinitializer; + %tmp3 = icmp ugt <2 x i64> %A, <i64 1, i64 1> %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -1298,9 +1300,9 @@ define <8 x i8> @cmloz8xi8(<8 x i8> %A) { ; CHECK-LABEL: cmloz8xi8: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}} +; CHECK: movi v1.8b, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v1.8b, {{v[0-9]+}}.8b - %tmp3 = icmp ult <8 x i8> %A, zeroinitializer; + %tmp3 = icmp ult <8 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } @@ -1309,9 +1311,9 @@ define <16 x i8> @cmloz16xi8(<16 x i8> %A) { ; CHECK-LABEL: cmloz16xi8: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}} +; CHECK: movi v1.16b, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b - %tmp3 = icmp ult <16 x i8> %A, zeroinitializer; + %tmp3 = icmp ult <16 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } @@ -1320,9 +1322,9 @@ define <4 x i16> @cmloz4xi16(<4 x i16> %A) { ; CHECK-LABEL: cmloz4xi16: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}} +; CHECK: movi v1.4h, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h - %tmp3 = icmp ult <4 x i16> %A, zeroinitializer; + %tmp3 = icmp ult <4 x i16> %A, <i16 2, i16 2, i16 2, i16 2> %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } @@ -1331,9 +1333,9 @@ define <8 x i16> @cmloz8xi16(<8 x i16> %A) { ; CHECK-LABEL: cmloz8xi16: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}} +; CHECK: movi v1.8h, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h - %tmp3 = icmp ult <8 x i16> %A, zeroinitializer; + %tmp3 = icmp ult <8 x i16> %A, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } @@ -1342,9 +1344,9 @@ define <2 x i32> @cmloz2xi32(<2 x i32> %A) { ; CHECK-LABEL: cmloz2xi32: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}} +; CHECK: movi v1.2s, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s - %tmp3 = icmp ult <2 x i32> %A, zeroinitializer; + %tmp3 = icmp ult <2 x i32> %A, <i32 2, i32 2> %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } @@ -1353,9 +1355,9 @@ define <4 x i32> @cmloz4xi32(<4 x i32> %A) { ; CHECK-LABEL: cmloz4xi32: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}} +; CHECK: movi v1.4s, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s - %tmp3 = icmp ult <4 x i32> %A, zeroinitializer; + %tmp3 = icmp ult <4 x i32> %A, <i32 2, i32 2, i32 2, i32 2> %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } @@ -1364,9 +1366,10 @@ define <2 x i64> @cmloz2xi64(<2 x i64> %A) { ; CHECK-LABEL: cmloz2xi64: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}} +; CHECK: orr w[[TWO:[0-9]+]], wzr, #{{0x2|2}} +; CHECK-NEXT: dup v1.2d, x[[TWO]] ; CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d - %tmp3 = icmp ult <2 x i64> %A, zeroinitializer; + %tmp3 = icmp ult <2 x i64> %A, <i64 2, i64 2> %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } diff --git a/test/CodeGen/AArch64/neon-dot-product.ll b/test/CodeGen/AArch64/neon-dot-product.ll new file mode 100644 index 000000000000..5ad75a7d24c5 --- /dev/null +++ b/test/CodeGen/AArch64/neon-dot-product.ll @@ -0,0 +1,126 @@ +; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod < %s | FileCheck %s + +declare <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) +declare <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) +declare <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) +declare <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) + +define <2 x i32> @test_vdot_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #0 { +entry: +; CHECK-LABEL: test_vdot_u32: +; CHECK: udot v0.2s, v1.8b, v2.8b + %vdot1.i = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #2 + ret <2 x i32> %vdot1.i +} + +define <4 x i32> @test_vdotq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #0 { +entry: +; CHECK-LABEL: test_vdotq_u32: +; CHECK: udot v0.4s, v1.16b, v2.16b + %vdot1.i = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #2 + ret <4 x i32> %vdot1.i +} + +define <2 x i32> @test_vdot_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #0 { +entry: +; CHECK-LABEL: test_vdot_s32: +; CHECK: sdot v0.2s, v1.8b, v2.8b + %vdot1.i = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #2 + ret <2 x i32> %vdot1.i +} + +define <4 x i32> @test_vdotq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #0 { +entry: +; CHECK-LABEL: test_vdotq_s32: +; CHECK: sdot v0.4s, v1.16b, v2.16b + %vdot1.i = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #2 + ret <4 x i32> %vdot1.i +} + +define <2 x i32> @test_vdot_lane_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) { +entry: +; CHECK-LABEL: test_vdot_lane_u32: +; CHECK: udot v0.2s, v1.8b, v2.4b[1] + %.cast = bitcast <8 x i8> %c to <2 x i32> + %shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <2 x i32> <i32 1, i32 1> + %.cast5 = bitcast <2 x i32> %shuffle to <8 x i8> + %vdot1.i = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %.cast5) #2 + ret <2 x i32> %vdot1.i +} + +define <4 x i32> @test_vdotq_lane_u32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) { +entry: +; CHECK-LABEL: test_vdotq_lane_u32: +; CHECK: udot v0.4s, v1.16b, v2.4b[1] + %.cast = bitcast <8 x i8> %c to <2 x i32> + %shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %.cast3 = bitcast <4 x i32> %shuffle to <16 x i8> + %vdot1.i = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %.cast3) #2 + ret <4 x i32> %vdot1.i +} + +define <2 x i32> @test_vdot_laneq_u32(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c) { +entry: +; CHECK-LABEL: test_vdot_laneq_u32: +; CHECK: udot v0.2s, v1.8b, v2.4b[1] + %.cast = bitcast <16 x i8> %c to <4 x i32> + %shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <2 x i32> <i32 1, i32 1> + %.cast5 = bitcast <2 x i32> %shuffle to <8 x i8> + %vdot1.i = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %.cast5) #2 + ret <2 x i32> %vdot1.i +} + +define <4 x i32> @test_vdotq_laneq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) { +entry: +; CHECK-LABEL: test_vdotq_laneq_u32: +; CHECK: udot v0.4s, v1.16b, v2.4b[1] + %.cast = bitcast <16 x i8> %c to <4 x i32> + %shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %.cast3 = bitcast <4 x i32> %shuffle to <16 x i8> + %vdot1.i = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %.cast3) #2 + ret <4 x i32> %vdot1.i +} + +define <2 x i32> @test_vdot_lane_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) { +entry: +; CHECK-LABEL: test_vdot_lane_s32: +; CHECK: sdot v0.2s, v1.8b, v2.4b[1] + %.cast = bitcast <8 x i8> %c to <2 x i32> + %shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <2 x i32> <i32 1, i32 1> + %.cast5 = bitcast <2 x i32> %shuffle to <8 x i8> + %vdot1.i = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %.cast5) #2 + ret <2 x i32> %vdot1.i +} + +define <4 x i32> @test_vdotq_lane_s32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) { +entry: +; CHECK-LABEL: test_vdotq_lane_s32: +; CHECK: sdot v0.4s, v1.16b, v2.4b[1] + %.cast = bitcast <8 x i8> %c to <2 x i32> + %shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %.cast3 = bitcast <4 x i32> %shuffle to <16 x i8> + %vdot1.i = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %.cast3) #2 + ret <4 x i32> %vdot1.i +} + +define <2 x i32> @test_vdot_laneq_s32(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c) { +entry: +; CHECK-LABEL: test_vdot_laneq_s32: +; CHECK: sdot v0.2s, v1.8b, v2.4b[1] + %.cast = bitcast <16 x i8> %c to <4 x i32> + %shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <2 x i32> <i32 1, i32 1> + %.cast5 = bitcast <2 x i32> %shuffle to <8 x i8> + %vdot1.i = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %.cast5) #2 + ret <2 x i32> %vdot1.i +} + +define <4 x i32> @test_vdotq_laneq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) { +entry: +; CHECK-LABEL: test_vdotq_laneq_s32: +; CHECK: sdot v0.4s, v1.16b, v2.4b[1] + %.cast = bitcast <16 x i8> %c to <4 x i32> + %shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %.cast3 = bitcast <4 x i32> %shuffle to <16 x i8> + %vdot1.i = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %.cast3) #2 + ret <4 x i32> %vdot1.i +} diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll index f270b54abb46..c159da1e9d18 100644 --- a/test/CodeGen/AArch64/neon-extract.ll +++ b/test/CodeGen/AArch64/neon-extract.ll @@ -26,6 +26,7 @@ entry: define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: test_vext_s64: +; CHECK-NOT: ext {{v[0-9]+}} entry: %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> <i32 0> ret <1 x i64> %vext @@ -136,6 +137,7 @@ entry: define <1 x double> @test_vext_f64(<1 x double> %a, <1 x double> %b) { ; CHECK-LABEL: test_vext_f64: +; CHECK-NOT: ext {{v[0-9]+}} entry: %vext = shufflevector <1 x double> %a, <1 x double> %b, <1 x i32> <i32 0> ret <1 x double> %vext diff --git a/test/CodeGen/AArch64/neon-fma-FMF.ll b/test/CodeGen/AArch64/neon-fma-FMF.ll index 25beef6592b2..893d153801a7 100644 --- a/test/CodeGen/AArch64/neon-fma-FMF.ll +++ b/test/CodeGen/AArch64/neon-fma-FMF.ll @@ -1,13 +1,23 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -define <2 x float> @fma(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -; CHECK-LABEL: fma: +define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +; CHECK-LABEL: fma_1: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp1 = fmul contract <2 x float> %A, %B; %tmp2 = fadd contract <2 x float> %C, %tmp1; ret <2 x float> %tmp2 } +; This case will fold as it was only available through unsafe before, now available from +; the contract on the fadd +define <2 x float> @fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +; CHECK-LABEL: fma_2: +; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = fmul <2 x float> %A, %B; + %tmp2 = fadd contract <2 x float> %C, %tmp1; + ret <2 x float> %tmp2 +} + define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; CHECK-LABEL: no_fma_1: ; CHECK: fmul @@ -17,19 +27,20 @@ define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ret <2 x float> %tmp2 } -define <2 x float> @no_fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -; CHECK-LABEL: no_fma_2: -; CHECK: fmul -; CHECK: fadd - %tmp1 = fmul <2 x float> %A, %B; - %tmp2 = fadd contract <2 x float> %C, %tmp1; +define <2 x float> @fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +; CHECK-LABEL: fma_sub_1: +; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = fmul contract <2 x float> %A, %B; + %tmp2 = fsub contract <2 x float> %C, %tmp1; ret <2 x float> %tmp2 } -define <2 x float> @fma_sub(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -; CHECK-LABEL: fma_sub: +; This case will fold as it was only available through unsafe before, now available from +; the contract on the fsub +define <2 x float> @fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +; CHECK-LABEL: fma_sub_2: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp1 = fmul contract <2 x float> %A, %B; + %tmp1 = fmul <2 x float> %A, %B; %tmp2 = fsub contract <2 x float> %C, %tmp1; ret <2 x float> %tmp2 } @@ -42,12 +53,3 @@ define <2 x float> @no_fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) %tmp2 = fsub <2 x float> %C, %tmp1; ret <2 x float> %tmp2 } - -define <2 x float> @no_fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -; CHECK-LABEL: no_fma_sub_2: -; CHECK: fmul -; CHECK: fsub - %tmp1 = fmul <2 x float> %A, %B; - %tmp2 = fsub contract <2 x float> %C, %tmp1; - ret <2 x float> %tmp2 -} diff --git a/test/CodeGen/AArch64/neon-idiv.ll b/test/CodeGen/AArch64/neon-idiv.ll index de402c4780be..cc470078aa56 100644 --- a/test/CodeGen/AArch64/neon-idiv.ll +++ b/test/CodeGen/AArch64/neon-idiv.ll @@ -3,11 +3,13 @@ define <4 x i32> @test1(<4 x i32> %a) { %rem = srem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7> ret <4 x i32> %rem -; CHECK-LABEL: test1 -; FIXME: Can we lower this more efficiently? -; CHECK: mul -; CHECK: mul -; CHECK: mul -; CHECK: mul +; For C constant X/C is simplified to X-X/C*C. The X/C division is lowered +; to MULHS due the simplification by multiplying by a magic number +; (TargetLowering::BuildSDIV). +; CHECK-LABEL: test1: +; CHECK: smull2 [[SMULL2:(v[0-9]+)]].2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: smull [[SMULL:(v[0-9]+)]].2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK: uzp2 [[UZP2:(v[0-9]+).4s]], [[SMULL]].4s, [[SMULL2]].4s +; CHECK: add [[ADD:(v[0-9]+.4s)]], [[UZP2]], v0.4s +; CHECK: sshr [[SSHR:(v[0-9]+.4s)]], [[ADD]], #2 } - diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll index 2384e485fd73..4a45484d17a9 100644 --- a/test/CodeGen/AArch64/neon-scalar-copy.ll +++ b/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -79,7 +79,7 @@ define half @test_dup_hv8H_0(<8 x half> %v) #0 { define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) #0 { ; CHECK-LABEL: test_vector_dup_bv16B: - ; CHECK-NEXT: dup v0.16b, v0.b[14] + ; CHECK-NEXT: dup v0.8b, v0.b[14] ; CHECK-NEXT: ret %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14> ret <1 x i8> %shuffle.i @@ -95,7 +95,7 @@ define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) #0 { define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) #0 { ; CHECK-LABEL: test_vector_dup_hv8H: - ; CHECK-NEXT: dup v0.8h, v0.h[7] + ; CHECK-NEXT: dup v0.4h, v0.h[7] ; CHECK-NEXT: ret %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7> ret <1 x i16> %shuffle.i @@ -111,7 +111,7 @@ define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) #0 { define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) #0 { ; CHECK-LABEL: test_vector_dup_sv4S: - ; CHECK-NEXT: dup v0.4s, v0.s[3] + ; CHECK-NEXT: dup v0.2s, v0.s[3] ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3> ret <1 x i32> %shuffle @@ -135,7 +135,7 @@ define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) #0 { define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) #0 { ; CHECK-LABEL: test_vector_copy_dup_dv2D: - ; CHECK-NEXT: dup v0.2d, v1.d[1] + ; CHECK-NEXT: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8 ; CHECK-NEXT: ret %vget_lane = extractelement <2 x i64> %c, i32 1 %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0 diff --git a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll index 0d5ebb324ecb..2f2f54fd5a5d 100644 --- a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll +++ b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll @@ -20,6 +20,16 @@ define void @truncStore.v4i32(<4 x i32> %a, <4 x i16>* %result) { ret void } +define void @truncStore.v4i8(<4 x i32> %a, <4 x i8>* %result) { +; CHECK-LABEL: truncStore.v4i8: +; CHECK: xtn [[TMP:(v[0-9]+)]].4h, v{{[0-9]+}}.4s +; CHECK-NEXT: xtn [[TMP2:(v[0-9]+)]].8b, [[TMP]].8h +; CHECK-NEXT: str s{{[0-9]+}}, [x{{[0-9]+}}] + %b = trunc <4 x i32> %a to <4 x i8> + store <4 x i8> %b, <4 x i8>* %result + ret void +} + define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) { ; CHECK-LABEL: truncStore.v8i16: ; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h diff --git a/test/CodeGen/AArch64/no-quad-ldp-stp.ll b/test/CodeGen/AArch64/no-quad-ldp-stp.ll index fb030d291362..32f57cd21e87 100644 --- a/test/CodeGen/AArch64/no-quad-ldp-stp.ll +++ b/test/CodeGen/AArch64/no-quad-ldp-stp.ll @@ -1,10 +1,12 @@ -; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+slow-paired-128 -verify-machineinstrs -asm-verbose=false | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-eabi -mcpu=exynos-m1 -verify-machineinstrs -asm-verbose=false | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+slow-paired-128 -verify-machineinstrs -asm-verbose=false | FileCheck %s --check-prefixes=CHECK,SLOW +; RUN: llc < %s -mtriple=aarch64-eabi -mcpu=exynos-m1 -verify-machineinstrs -asm-verbose=false | FileCheck %s --check-prefixes=CHECK,SLOW +; RUN: llc < %s -mtriple=aarch64-eabi -mcpu=exynos-m3 -verify-machineinstrs -asm-verbose=false | FileCheck %s --check-prefixes=CHECK,FAST ; CHECK-LABEL: test_nopair_st -; CHECK: str -; CHECK: stur -; CHECK-NOT: stp +; SLOW: str +; SLOW: stur +; SLOW-NOT: stp +; FAST: stp define void @test_nopair_st(double* %ptr, <2 x double> %v1, <2 x double> %v2) { %tmp1 = bitcast double* %ptr to <2 x double>* store <2 x double> %v2, <2 x double>* %tmp1, align 16 @@ -15,9 +17,10 @@ define void @test_nopair_st(double* %ptr, <2 x double> %v1, <2 x double> %v2) { } ; CHECK-LABEL: test_nopair_ld -; CHECK: ldr -; CHECK: ldr -; CHECK-NOT: ldp +; SLOW: ldr +; SLOW: ldr +; SLOW-NOT: ldp +; FAST: ldp define <2 x i64> @test_nopair_ld(i64* %p) { %a1 = bitcast i64* %p to <2 x i64>* %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8 diff --git a/test/CodeGen/AArch64/no-stack-arg-probe.ll b/test/CodeGen/AArch64/no-stack-arg-probe.ll new file mode 100644 index 000000000000..0a4097f4d388 --- /dev/null +++ b/test/CodeGen/AArch64/no-stack-arg-probe.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs %s -o - \ +; RUN: | FileCheck %s + +define void @check_watermark() "no-stack-arg-probe" { +entry: + %buffer = alloca [4096 x i8], align 1 + ret void +} + +; CHECK: check_watermark: +; CHECK: sub sp, sp, #1, lsl #12 +; CHECK-NOT: bl __chkstk diff --git a/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir b/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir new file mode 100644 index 000000000000..093473ab472b --- /dev/null +++ b/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir @@ -0,0 +1,16 @@ +# RUN: not llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o /dev/null 2>&1 | FileCheck %s + +# Check we don't infinitely loop on cycles in copy bundles. +# CHECK: error: register rewriting failed: cycle in copy bundle + +--- +name: func0 +body: | + bb.0: + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q1_q2_q3 = COPY $q0_q1_q2 { + $q2_q3_q4 = COPY $q1_q2_q3 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 +... diff --git a/test/CodeGen/AArch64/overlapping-copy-bundle.mir b/test/CodeGen/AArch64/overlapping-copy-bundle.mir new file mode 100644 index 000000000000..84f39ea93985 --- /dev/null +++ b/test/CodeGen/AArch64/overlapping-copy-bundle.mir @@ -0,0 +1,80 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o - | FileCheck %s +--- +name: func0 +body: | + bb.0: + ; Make sure we don't clobber q3 when we expand the bundle + ; CHECK-LABEL: name: func0 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF + ; CHECK: $q4 = COPY $q3 + ; CHECK: $q1_q2_q3 = COPY $q0_q1_q2 + ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0 + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q1_q2_q3 = COPY $q0_q1_q2 { + $q4 = COPY $q3 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 + +... +--- +name: func1 +body: | + bb.0: + ; If it was already ordered, make sure we don't break it + ; CHECK-LABEL: name: func1 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF + ; CHECK: $q4 = COPY $q3 + ; CHECK: $q1_q2_q3 = COPY $q0_q1_q2 + ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0 + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q4 = COPY $q3 { + $q1_q2_q3 = COPY $q0_q1_q2 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 + +... +--- +name: func2 +body: | + bb.0: + ; A bit less realistic, but check that we handle multiple nodes + ; CHECK-LABEL: name: func2 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF + ; CHECK: $q3 = COPY $q2 + ; CHECK: $q4 = COPY $q1 + ; CHECK: $q1_q2 = COPY $q0_q1 + ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0 + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q1_q2 = COPY $q0_q1 { + $q3 = COPY $q2 + $q4 = COPY $q1 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 + +... +--- +name: func3 +body: | + bb.0: + ; If there was nothing wrong, don't change the order for no reason + ; CHECK-LABEL: name: func3 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q1_q2_q3_q4 = IMPLICIT_DEF + ; CHECK: $q0_q1 = COPY $q1_q2 + ; CHECK: $q2_q3 = COPY $q3_q4 + ; CHECK: ST4i64 $q0_q1_q2_q3, 0, $x0 + $x0 = IMPLICIT_DEF + $q1_q2_q3_q4 = IMPLICIT_DEF + $q0_q1 = COPY $q1_q2 { + $q2_q3 = COPY $q3_q4 + } + ST4i64 $q0_q1_q2_q3, 0, $x0 + +... diff --git a/test/CodeGen/AArch64/phi-dbg.ll b/test/CodeGen/AArch64/phi-dbg.ll index 80bc885afa5c..a2c97f311080 100644 --- a/test/CodeGen/AArch64/phi-dbg.ll +++ b/test/CodeGen/AArch64/phi-dbg.ll @@ -30,7 +30,7 @@ define i32 @func(i32) #0 !dbg !8 { ; CHECK: ldr w[[REG:[0-9]+]], [sp, #8] ; CHECK-NEXT: .Ltmp call void @llvm.dbg.value(metadata i32 %.0, i64 0, metadata !15, metadata !13), !dbg !16 -; CHECK-NEXT: //DEBUG_VALUE: func:c <- %w[[REG]] +; CHECK-NEXT: //DEBUG_VALUE: func:c <- $w[[REG]] %5 = add nsw i32 %.0, %0, !dbg !22 call void @llvm.dbg.value(metadata i32 %5, i64 0, metadata !15, metadata !13), !dbg !16 ret i32 %5, !dbg !23 @@ -57,7 +57,7 @@ attributes #1 = { nounwind readnone } !5 = !{i32 1, !"wchar_size", i32 4} !6 = !{i32 1, !"min_enum_size", i32 4} !7 = !{!"clang"} -!8 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!8 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) !9 = !DISubroutineType(types: !10) !10 = !{!11, !11} !11 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) diff --git a/test/CodeGen/AArch64/post-ra-machine-sink.mir b/test/CodeGen/AArch64/post-ra-machine-sink.mir new file mode 100644 index 000000000000..ca9e9f6c1c69 --- /dev/null +++ b/test/CodeGen/AArch64/post-ra-machine-sink.mir @@ -0,0 +1,400 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s + +--- +# Sink w19 to %bb.1. +# CHECK-LABEL: name: sinkcopy1 +# CHECK-LABEL: bb.0: +# CHECK-NOT: $w19 = COPY killed $w0 +# CHECK-LABEL: bb.1: +# CHECK: liveins: $w1, $w0 +# CHECK: renamable $w19 = COPY killed $w0 + +name: sinkcopy1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY killed $w0 + Bcc 11, %bb.1, implicit $nzcv + B %bb.2 + + bb.1: + liveins: $w1, $w19 + $w0 = ADDWrr $w1, $w19 + RET $x0 + + bb.2: + $w0 = COPY $wzr + RET $x0 +... + +--- +# Sink w19 to %bb.2. +# CHECK-LABEL: name: sinkcopy2 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable $w19 = COPY killed $w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: $w1, $w0 +# CHECK: renamable $w19 = COPY killed $w0 +name: sinkcopy2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY killed $w0 + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + $w0 = COPY $wzr + RET $x0 + + bb.2: + liveins: $w1, $w19 + $w0 = ADDWrr $w1, $w19 + RET $x0 +... + +--- +# Sink w19 and w20 to %bb.1. +# CHECK-LABEL: name: sinkcopy3 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable $w19 = COPY killed $w0 +# CHECK-LABEL: bb.1: +# CHECK: liveins: $w1, $w0 +# CHECK: renamable $w19 = COPY killed $w0 +# CHECK: renamable $w20 = COPY killed $w1 +name: sinkcopy3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY killed $w0 + renamable $w20 = COPY killed $w1 + + bb.1: + liveins: $w19, $w20 + $w0 = COPY $w19 + $w1 = COPY $w20 + RET $x0 +... + + +# Sink w19 to %bb.1 and w20 to %bb.2. +# CHECK-LABEL: name: sinkcopy4 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable $w19 = COPY killed $w0 +# CHECK-NOT: renamable $w20 = COPY killed $w1 +# CHECK-LABEL: bb.1: +# CHECK: liveins: $w1, $w0 +# CHECK: renamable $w19 = COPY killed $w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: $w0, $w1 +# CHECK: renamable $w20 = COPY killed $w1 +name: sinkcopy4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY killed $w0 + renamable $w20 = COPY killed $w1 + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + liveins: $w1, $w19 + $w0 = ADDWrr $w1, $w19 + RET $x0 + + bb.2: + liveins: $w0, $w20 + $w0 = ADDWrr $w0, $w20 + RET $x0 +... + +# Sink w19 to %bb.3 through %bb.2. +# CHECK-LABEL: name: sinkcopy5 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable $w19 = COPY $w0 +# CHECK-LABEL: bb.2: +# CHECK: $w1 = ADDWrr $w1, $w0 +# CHECK-LABEL: bb.3: +# CHECK: liveins: $w1, $w0 +# CHECK: renamable $w19 = COPY killed $w0 +name: sinkcopy5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY $w0 + Bcc 11, %bb.2, implicit $nzcv + + bb.1: + liveins: $x0 + $w19 = COPY $wzr + RET $x0 + + bb.2: + liveins: $w0, $w1, $w19 + $w1 = ADDWrr $w1, killed $w0 + + bb.3: + liveins: $w1, $w19 + $w0 = ADDWrr $w1, $w19 + RET $x0 +... + +# Sink w19 to %bb.3, but through %bb.2. +# CHECK-LABEL: name: sinkcopy6 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable $w19 = COPY $w0 +# CHECK-NOT: renamable $w20 = COPY $w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: $w1, $w0 +# CHECK: renamable $w19 = COPY $w0 +# CHECK: renamable $w20 = COPY $w19 +name: sinkcopy6 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY $w0 + renamable $w20 = COPY $w19 + Bcc 11, %bb.2, implicit $nzcv + + bb.1: + $w0 = COPY $wzr + RET $x0 + + bb.2: + liveins: $w1, $w20 + $w0 = ADDWrr killed $w1, $w20 + RET $x0 +... + +--- +# Sink w19 regardless of the def of wzr in bb.0. +# CHECK-LABEL: name: sinkcopy7 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable $w19 = COPY $w0 +# CHECK-LABEL: bb.2: +# CHECK: renamable $w19 = COPY $wzr +name: sinkcopy7 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + renamable $w19 = COPY $wzr + $wzr = SUBSWri $w1, 1, 0, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + $x0 = COPY $xzr + RET $x0 + + bb.2: + liveins: $w0, $w19 + $w0 = ADDWrr $w0, $w19 + RET $x0 +... + +--- +# Sink w19 to %bb.3 through %bb.2. +# CHECK-LABEL: name: sinkcopy8 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable $w19 = COPY $w0, implicit-def $x19 +# CHECK-LABEL: bb.2: +# CHECK: $w1 = ADDWrr $w1, $w0, implicit $x0 +# CHECK-LABEL: bb.3: +# CHECK: liveins: $x1, $w0 +# CHECK: renamable $w19 = COPY killed $w0, implicit-def $x19 +name: sinkcopy8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $x1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY $w0, implicit-def $x19 + Bcc 11, %bb.2, implicit $nzcv + + bb.1: + liveins: $x0 + $w19 = COPY $wzr + RET $x0 + + bb.2: + liveins: $w0, $x1, $x19 + $w1 = ADDWrr $w1, $w0, implicit killed $x0 + + bb.3: + liveins: $x1, $x19 + $x0 = ADDXrr $x1, $x19 + RET $x0 +... + +--- +# Don't sink w19 as w0 is defined in bb.0. +# CHECK-LABEL: name: donotsinkcopy1 +# CHECK-LABEL: bb.0: +# CHECK: renamable $w19 = COPY $w0 +# CHECK: $w0 = LDRWui $sp, 0 +name: donotsinkcopy1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY $w0 + $w0 = LDRWui $sp, 0 :: (load 4) + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + $x0 = COPY $xzr + RET $x0 + + bb.2: + liveins: $w0, $w19 + $w0 = ADDWrr $w0, $w19 + RET $x0 +... + +--- +# Don't sink w19 as w19 is used in bb.0. +# CHECK-LABEL: name: donotsinkcopy2 +# CHECK-LABEL: bb.0: +# CHECK: renamable $w19 = COPY $w0 +# CHECK: STRWui $w1, $x19, 0 +name: donotsinkcopy2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY $w0 + STRWui $w1, $x19, 0 :: (store 4) + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + $x0 = COPY $xzr + RET $x0 + + bb.2: + liveins: $w0, $w19 + $w0 = ADDWrr $w0, $w19 + RET $x0 +... + +--- +# Don't sink w19 as w19 is used in both %bb.1 and %bb.2. +# CHECK-LABEL: name: donotsinkcopy3 +# CHECK-LABEL: bb.0: +# CHECK: renamable $w19 = COPY $w0 +name: donotsinkcopy3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY $w0 + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + liveins: $w19 + $w0 = COPY $w19 + RET $x0 + + bb.2: + liveins: $w0, $w19 + $w0 = ADDWrr $w0, $w19 + RET $x0 +... + +--- +# Don't sink w19 as %bb.2 has multiple predecessors. +# CHECK-LABEL: name: donotsinkcopy4 +# CHECK-LABEL: bb.0: +# CHECK: renamable $w19 = COPY $w0 +name: donotsinkcopy4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY $w0 + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + liveins: $w0 + $w19 = COPY $w0 + B %bb.2 + + bb.2: + liveins: $w0, $w19 + $w0 = ADDWrr $w0, $w19 + RET $x0 +... + + +# Don't sink w19 after sinking w20. +# CHECK-LABEL: name: donotsinkcopy5 +# CHECK-LABEL: bb.0: +# CHECK: renamable $w19 = COPY $w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: $w0, $w19 +# CHECK: renamable $w20 = COPY $w19 +name: donotsinkcopy5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $w19 = COPY $w0 + renamable $w20 = COPY $w19 + Bcc 11, %bb.2, implicit $nzcv + + bb.1: + liveins: $w19 + $w0 = COPY $w19 + RET $x0 + + bb.2: + liveins: $w0, $w20 + $w0 = ADDWrr killed $w0, $w20 + RET $x0 +... + +--- +# Don't sink w19 as x19 is live-in in %bb.2. +# CHECK-LABEL: name: donotsinkcopy6 +# CHECK-LABEL: bb.0: +name: donotsinkcopy6 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $w1 + $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv + renamable $x19 = COPY $x0 + Bcc 11, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + liveins: $w19 + $w0 = COPY $w19 + RET $x0 + + bb.2: + liveins: $x0, $x19 + $x0 = ADDXrr $x0, $x19 + RET $x0 +... diff --git a/test/CodeGen/AArch64/pr33172.ll b/test/CodeGen/AArch64/pr33172.ll index 1e1da78b28ff..098d5358b02d 100644 --- a/test/CodeGen/AArch64/pr33172.ll +++ b/test/CodeGen/AArch64/pr33172.ll @@ -21,12 +21,12 @@ entry: %wide.load8291059.4 = load i64, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.b, i64 0, i64 18) to i64*), align 8 store i64 %wide.load8281058.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 16) to i64*), align 8 store i64 %wide.load8291059.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 18) to i64*), align 8 - tail call void @llvm.memset.p0i8.i64(i8* bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i32 8, i1 false) #2 + tail call void @llvm.memset.p0i8.i64(i8* align 8 bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i1 false) #2 unreachable } ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 attributes #1 = { argmemonly nounwind } attributes #2 = { nounwind } diff --git a/test/CodeGen/AArch64/preferred-alignment.ll b/test/CodeGen/AArch64/preferred-alignment.ll index c032e83d268f..b39a5e8703d7 100644 --- a/test/CodeGen/AArch64/preferred-alignment.ll +++ b/test/CodeGen/AArch64/preferred-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64 -O0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -O0 -fast-isel < %s | FileCheck %s ; Function Attrs: nounwind define i32 @foo() #0 { diff --git a/test/CodeGen/AArch64/preferred-function-alignment.ll b/test/CodeGen/AArch64/preferred-function-alignment.ll index 4a0a5821edf7..0ac565e7d545 100644 --- a/test/CodeGen/AArch64/preferred-function-alignment.ll +++ b/test/CodeGen/AArch64/preferred-function-alignment.ll @@ -14,7 +14,7 @@ ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cortex-a73 < %s | FileCheck --check-prefixes=ALIGN4,CHECK %s ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=exynos-m1 < %s | FileCheck ---check-prefixes=ALIGN4,CHECK %s ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=exynos-m2 < %s | FileCheck ---check-prefixes=ALIGN4,CHECK %s -; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 < %s | FileCheck ---check-prefixes=ALIGN4,CHECK %s +; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 < %s | FileCheck ---check-prefixes=ALIGN5,CHECK %s define void @test() { ret void @@ -24,6 +24,7 @@ define void @test() { ; ALIGN2: .p2align 2 ; ALIGN3: .p2align 3 ; ALIGN4: .p2align 4 +; ALIGN5: .p2align 5 define void @test_optsize() optsize { ret void diff --git a/test/CodeGen/AArch64/reg-scavenge-frame.mir b/test/CodeGen/AArch64/reg-scavenge-frame.mir index f79e75e248b9..dad83ac97f46 100644 --- a/test/CodeGen/AArch64/reg-scavenge-frame.mir +++ b/test/CodeGen/AArch64/reg-scavenge-frame.mir @@ -12,75 +12,75 @@ stack: - { id: 0, type: spill-slot, offset: 0, size: 32, alignment: 8 } body: | bb.0: - liveins: %d16_d17_d18_d19 - %x0 = COPY %xzr - %x1 = COPY %xzr - %x2 = COPY %xzr - %x3 = COPY %xzr - %x4 = COPY %xzr - %x5 = COPY %xzr - %x6 = COPY %xzr - %x7 = COPY %xzr - %x8 = COPY %xzr - %x9 = COPY %xzr - %x10 = COPY %xzr - %x11 = COPY %xzr - %x12 = COPY %xzr - %x13 = COPY %xzr - %x14 = COPY %xzr - %x15 = COPY %xzr - %x16 = COPY %xzr - %x17 = COPY %xzr - %x18 = COPY %xzr - %x19 = COPY %xzr - %x20 = COPY %xzr - %x21 = COPY %xzr - %x22 = COPY %xzr - %x23 = COPY %xzr - %x24 = COPY %xzr - %x25 = COPY %xzr - %x26 = COPY %xzr - %x27 = COPY %xzr - %x28 = COPY %xzr - %fp = COPY %xzr - %lr = COPY %xzr - ST1Fourv1d killed %d16_d17_d18_d19, %stack.0 :: (store 32 into %stack.0, align 8) - ; CHECK: STRXui killed %[[SCAVREG:x[0-9]+|fp|lr]], %sp, [[SPOFFSET:[0-9]+]] :: (store 8 into %stack.1) - ; CHECK-NEXT: %[[SCAVREG]] = ADDXri %sp, {{[0-9]+}}, 0 - ; CHECK-NEXT: ST1Fourv1d killed %d16_d17_d18_d19, killed %[[SCAVREG]] :: (store 32 into %stack.0, align 8) - ; CHECK-NEXT: %[[SCAVREG]] = LDRXui %sp, [[SPOFFSET]] :: (load 8 from %stack.1) + liveins: $d16_d17_d18_d19 + $x0 = COPY $xzr + $x1 = COPY $xzr + $x2 = COPY $xzr + $x3 = COPY $xzr + $x4 = COPY $xzr + $x5 = COPY $xzr + $x6 = COPY $xzr + $x7 = COPY $xzr + $x8 = COPY $xzr + $x9 = COPY $xzr + $x10 = COPY $xzr + $x11 = COPY $xzr + $x12 = COPY $xzr + $x13 = COPY $xzr + $x14 = COPY $xzr + $x15 = COPY $xzr + $x16 = COPY $xzr + $x17 = COPY $xzr + $x18 = COPY $xzr + $x19 = COPY $xzr + $x20 = COPY $xzr + $x21 = COPY $xzr + $x22 = COPY $xzr + $x23 = COPY $xzr + $x24 = COPY $xzr + $x25 = COPY $xzr + $x26 = COPY $xzr + $x27 = COPY $xzr + $x28 = COPY $xzr + $fp = COPY $xzr + $lr = COPY $xzr + ST1Fourv1d killed $d16_d17_d18_d19, %stack.0 :: (store 32 into %stack.0, align 8) + ; CHECK: STRXui killed $[[SCAVREG:x[0-9]+|fp|lr]], $sp, [[SPOFFSET:[0-9]+]] :: (store 8 into %stack.1) + ; CHECK-NEXT: $[[SCAVREG]] = ADDXri $sp, {{[0-9]+}}, 0 + ; CHECK-NEXT: ST1Fourv1d killed $d16_d17_d18_d19, killed $[[SCAVREG]] :: (store 32 into %stack.0, align 8) + ; CHECK-NEXT: $[[SCAVREG]] = LDRXui $sp, [[SPOFFSET]] :: (load 8 from %stack.1) - HINT 0, implicit %x0 - HINT 0, implicit %x1 - HINT 0, implicit %x2 - HINT 0, implicit %x3 - HINT 0, implicit %x4 - HINT 0, implicit %x5 - HINT 0, implicit %x6 - HINT 0, implicit %x7 - HINT 0, implicit %x8 - HINT 0, implicit %x9 - HINT 0, implicit %x10 - HINT 0, implicit %x11 - HINT 0, implicit %x12 - HINT 0, implicit %x13 - HINT 0, implicit %x14 - HINT 0, implicit %x15 - HINT 0, implicit %x16 - HINT 0, implicit %x17 - HINT 0, implicit %x18 - HINT 0, implicit %x19 - HINT 0, implicit %x20 - HINT 0, implicit %x21 - HINT 0, implicit %x22 - HINT 0, implicit %x23 - HINT 0, implicit %x24 - HINT 0, implicit %x25 - HINT 0, implicit %x26 - HINT 0, implicit %x27 - HINT 0, implicit %x28 - HINT 0, implicit %fp - HINT 0, implicit %lr + HINT 0, implicit $x0 + HINT 0, implicit $x1 + HINT 0, implicit $x2 + HINT 0, implicit $x3 + HINT 0, implicit $x4 + HINT 0, implicit $x5 + HINT 0, implicit $x6 + HINT 0, implicit $x7 + HINT 0, implicit $x8 + HINT 0, implicit $x9 + HINT 0, implicit $x10 + HINT 0, implicit $x11 + HINT 0, implicit $x12 + HINT 0, implicit $x13 + HINT 0, implicit $x14 + HINT 0, implicit $x15 + HINT 0, implicit $x16 + HINT 0, implicit $x17 + HINT 0, implicit $x18 + HINT 0, implicit $x19 + HINT 0, implicit $x20 + HINT 0, implicit $x21 + HINT 0, implicit $x22 + HINT 0, implicit $x23 + HINT 0, implicit $x24 + HINT 0, implicit $x25 + HINT 0, implicit $x26 + HINT 0, implicit $x27 + HINT 0, implicit $x28 + HINT 0, implicit $fp + HINT 0, implicit $lr RET_ReallyLR ... diff --git a/test/CodeGen/AArch64/regcoal-physreg.mir b/test/CodeGen/AArch64/regcoal-physreg.mir index 095e8a4973ce..1341b9551663 100644 --- a/test/CodeGen/AArch64/regcoal-physreg.mir +++ b/test/CodeGen/AArch64/regcoal-physreg.mir @@ -13,79 +13,79 @@ name: func0 body: | bb.0: ; We usually should not coalesce copies from allocatable physregs. - ; CHECK: %0:gpr32 = COPY %w7 - ; CHECK: STRWui %0, %x1, 0 - %0 : gpr32 = COPY %w7 - STRWui %0, %x1, 0 + ; CHECK: %0:gpr32 = COPY $w7 + ; CHECK: STRWui %0, $x1, 0 + %0 : gpr32 = COPY $w7 + STRWui %0, $x1, 0 ; It is fine to coalesce copies from reserved physregs ; CHECK-NOT: COPY - ; CHECK: STRXui %fp, %x1, 0 - %1 : gpr64 = COPY %fp - STRXui %1, %x1, 0 + ; CHECK: STRXui $fp, $x1, 0 + %1 : gpr64 = COPY $fp + STRXui %1, $x1, 0 ; It is not fine to coalesce copies from reserved physregs when they are ; clobbered. - ; CHECK: %2:gpr64 = COPY %fp - ; CHECK: STRXui %2, %x1, 0 - %2 : gpr64 = COPY %fp - %fp = SUBXri %fp, 4, 0 - STRXui %2, %x1, 0 + ; CHECK: %2:gpr64 = COPY $fp + ; CHECK: STRXui %2, $x1, 0 + %2 : gpr64 = COPY $fp + $fp = SUBXri $fp, 4, 0 + STRXui %2, $x1, 0 ; Is is fine to coalesce copies from constant physregs even when they are ; clobbered. ; CHECK-NOT: COPY - ; CHECK: STRWui %wzr, %x1 - %3 : gpr32 = COPY %wzr - dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv - STRWui %3, %x1, 0 + ; CHECK: STRWui $wzr, $x1 + %3 : gpr32 = COPY $wzr + dead $wzr = SUBSWri $w1, 0, 0, implicit-def $nzcv + STRWui %3, $x1, 0 ; Is is fine to coalesce copies from constant physregs even when they are ; clobbered. ; CHECK-NOT: COPY - ; CHECK: STRXui %xzr, %x1 - %4 : gpr64 = COPY %xzr - dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv - STRXui %4, %x1, 0 + ; CHECK: STRXui $xzr, $x1 + %4 : gpr64 = COPY $xzr + dead $wzr = SUBSWri $w1, 0, 0, implicit-def $nzcv + STRXui %4, $x1, 0 ; Coalescing COPYs into constant physregs. - ; CHECK: %wzr = SUBSWri %w1, 0, 0 - %5 : gpr32 = SUBSWri %w1, 0, 0, implicit-def %nzcv - %wzr = COPY %5 + ; CHECK: $wzr = SUBSWri $w1, 0, 0 + %5 : gpr32 = SUBSWri $w1, 0, 0, implicit-def $nzcv + $wzr = COPY %5 ; Only coalesce when the source register is reserved as a whole (this is ; a limitation of the current code which cannot update liveness information ; of the non-reserved part). - ; CHECK: %6:xseqpairsclass = COPY %x28_fp + ; CHECK: %6:xseqpairsclass = COPY $x28_fp ; CHECK: HINT 0, implicit %6 - %6 : xseqpairsclass = COPY %x28_fp + %6 : xseqpairsclass = COPY $x28_fp HINT 0, implicit %6 ; It is not fine to coalesce copies from reserved physregs when they are ; clobbered by the regmask on a call. - ; CHECK: %7:gpr64 = COPY %x18 - ; CHECK: BL @f2, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp - ; CHECK: STRXui %7, %x1, 0 + ; CHECK: %7:gpr64 = COPY $x18 + ; CHECK: BL @f2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK: STRXui %7, $x1, 0 ; Need a def of x18 so that it's not deduced as "constant". - %x18 = COPY %xzr - %7 : gpr64 = COPY %x18 - BL @f2, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp - STRXui %7, %x1, 0 + $x18 = COPY $xzr + %7 : gpr64 = COPY $x18 + BL @f2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + STRXui %7, $x1, 0 ; This can be coalesced. - ; CHECK: %fp = SUBXri %fp, 4, 0 - %8 : gpr64sp = SUBXri %fp, 4, 0 - %fp = COPY %8 + ; CHECK: $fp = SUBXri $fp, 4, 0 + %8 : gpr64sp = SUBXri $fp, 4, 0 + $fp = COPY %8 ; Cannot coalesce when there are reads of the physreg. - ; CHECK-NOT: %fp = SUBXri %fp, 8, 0 - ; CHECK: %9:gpr64sp = SUBXri %fp, 8, 0 - ; CHECK: STRXui %fp, %fp, 0 - ; CHECK: %fp = COPY %9 - %9 : gpr64sp = SUBXri %fp, 8, 0 - STRXui %fp, %fp, 0 - %fp = COPY %9 + ; CHECK-NOT: $fp = SUBXri $fp, 8, 0 + ; CHECK: %9:gpr64sp = SUBXri $fp, 8, 0 + ; CHECK: STRXui $fp, $fp, 0 + ; CHECK: $fp = COPY %9 + %9 : gpr64sp = SUBXri $fp, 8, 0 + STRXui $fp, $fp, 0 + $fp = COPY %9 ... --- # Check coalescing of COPYs from reserved physregs. @@ -95,20 +95,20 @@ body: | bb.0: ; Cannot coalesce physreg because we have reads on other CFG paths (we ; currently abort for any control flow) - ; CHECK-NOT: %fp = SUBXri - ; CHECK: %0:gpr64sp = SUBXri %fp, 12, 0 - ; CHECK: CBZX undef %x0, %bb.1 + ; CHECK-NOT: $fp = SUBXri + ; CHECK: %0:gpr64sp = SUBXri $fp, 12, 0 + ; CHECK: CBZX undef $x0, %bb.1 ; CHECK: B %bb.2 - %0 : gpr64sp = SUBXri %fp, 12, 0 - CBZX undef %x0, %bb.1 + %0 : gpr64sp = SUBXri $fp, 12, 0 + CBZX undef $x0, %bb.1 B %bb.2 bb.1: - %fp = COPY %0 + $fp = COPY %0 RET_ReallyLR bb.2: - STRXui %fp, %fp, 0 + STRXui $fp, $fp, 0 RET_ReallyLR ... --- @@ -118,16 +118,16 @@ body: | bb.0: ; We can coalesce copies from physreg to vreg across multiple blocks. ; CHECK-NOT: COPY - ; CHECK: CBZX undef %x0, %bb.1 + ; CHECK: CBZX undef $x0, %bb.1 ; CHECK-NEXT: B %bb.2 - %0 : gpr64sp = COPY %fp - CBZX undef %x0, %bb.1 + %0 : gpr64sp = COPY $fp + CBZX undef $x0, %bb.1 B %bb.2 bb.1: - ; CHECK: STRXui undef %x0, %fp, 0 + ; CHECK: STRXui undef $x0, $fp, 0 ; CHECK-NEXT: RET_ReallyLR - STRXui undef %x0, %0, 0 + STRXui undef $x0, %0, 0 RET_ReallyLR bb.2: diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll index 9524154c3e0d..ed1d415067e8 100644 --- a/test/CodeGen/AArch64/remat.ll +++ b/test/CodeGen/AArch64/remat.ll @@ -8,6 +8,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m1 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m2 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m3 -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m4 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=falkor -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=saphira -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s diff --git a/test/CodeGen/AArch64/reverse-csr-restore-seq.mir b/test/CodeGen/AArch64/reverse-csr-restore-seq.mir new file mode 100644 index 000000000000..2b168b31a51b --- /dev/null +++ b/test/CodeGen/AArch64/reverse-csr-restore-seq.mir @@ -0,0 +1,106 @@ +# RUN: llc -run-pass=prologepilog -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK,BEFORELDSTOPT +# RUN: llc -start-before=prologepilog -stop-after=aarch64-ldst-opt -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK,AFTERLDSTOPT +# +--- | + + define void @foo() nounwind { entry: unreachable } + + define void @bar() nounwind { entry: unreachable } + + define void @baz() nounwind { entry: unreachable } + +... +--- +name: foo +# CHECK-LABEL: name: foo +tracksRegLiveness: true +body: | + bb.0: + $x19 = IMPLICIT_DEF + $x20 = IMPLICIT_DEF + $x21 = IMPLICIT_DEF + $x22 = IMPLICIT_DEF + $x23 = IMPLICIT_DEF + $x24 = IMPLICIT_DEF + $x25 = IMPLICIT_DEF + $x26 = IMPLICIT_DEF + + ; The local stack size is 0, so the last ldp in the sequence will also + ; restore the stack. + ; CHECK: $x24, $x23 = frame-destroy LDPXi $sp, 2 + ; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 4 + ; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 6 + + ; Before running the load-store optimizer, we emit a ldp and an add. + ; BEFORELDSTOPT-NEXT: $x26, $x25 = frame-destroy LDPXi $sp, 0 + ; BEFORELDSTOPT-NEXT: $sp = frame-destroy ADDXri $sp, 64, 0 + + ; We want to make sure that after running the load-store optimizer, the ldp + ; and the add get merged into a post-index ldp. + ; AFTERLDSTOPT-NEXT: early-clobber $sp, $x26, $x25 = frame-destroy LDPXpost $sp, 8 + + RET_ReallyLR +... +--- +name: bar +# CHECK-LABEL: name: bar +tracksRegLiveness: true +stack: + - { id : 0, size: 8, alignment: 4, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + +body: | + bb.0: + $x19 = IMPLICIT_DEF + $x20 = IMPLICIT_DEF + $x21 = IMPLICIT_DEF + $x22 = IMPLICIT_DEF + $x23 = IMPLICIT_DEF + $x24 = IMPLICIT_DEF + $x25 = IMPLICIT_DEF + $x26 = IMPLICIT_DEF + + ; The local stack size is not 0, and we can combine the CSR stack size with + ; the local stack size. This results in rewriting the offsets for all the + ; save/restores and forbids us to merge the stack adjustment and the last pop. + ; In this case, there is no point of moving the first CSR pair at the end. + ; CHECK: $x26, $x25 = frame-destroy LDPXi $sp, 2 + ; CHECK-NEXT: $x24, $x23 = frame-destroy LDPXi $sp, 4 + ; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 6 + ; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 8 + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 80, 0 + RET_ReallyLR +... +--- +# Check that the load from the offset 0 is moved at the end even when hasFP is +# false. +name: baz +# CHECK-LABEL: name: baz +alignment: 2 +tracksRegLiveness: true +frameInfo: + adjustsStack: true + hasCalls: true +body: | + bb.0: + successors: %bb.1 + + $x0 = IMPLICIT_DEF + $x20 = IMPLICIT_DEF + $x21 = IMPLICIT_DEF + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.1 + + bb.1: + ; CHECK: $x20, $lr = frame-destroy LDPXi $sp, 2 + ; BEFORELDSTOPT-NEXT: $x21 = frame-destroy LDRXui $sp, 0 + ; BEFORELDSTOPT-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0 + + ; AFTERLDSTOPT-NEXT: early-clobber $sp, $x21 = frame-destroy LDRXpost $sp, 32 + RET_ReallyLR +... diff --git a/test/CodeGen/AArch64/rotate-extract.ll b/test/CodeGen/AArch64/rotate-extract.ll new file mode 100644 index 000000000000..4f5313ca4a77 --- /dev/null +++ b/test/CodeGen/AArch64/rotate-extract.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +; Check that under certain conditions we can factor out a rotate +; from the following idioms: +; (a*c0) >> s1 | (a*c1) +; (a/c0) << s1 | (a/c1) +; This targets cases where instcombine has folded a shl/srl/mul/udiv +; with one of the shifts from the rotate idiom + +define i64 @ror_extract_shl(i64 %i) nounwind { +; CHECK-LABEL: ror_extract_shl: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl x8, x0, #10 +; CHECK-NEXT: bfxil x8, x0, #54, #7 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret + %lhs_mul = shl i64 %i, 3 + %rhs_mul = shl i64 %i, 10 + %lhs_shift = lshr i64 %lhs_mul, 57 + %out = or i64 %lhs_shift, %rhs_mul + ret i64 %out +} + +define i32 @ror_extract_shrl(i32 %i) nounwind { +; CHECK-LABEL: ror_extract_shrl: +; CHECK: // %bb.0: +; CHECK-NEXT: ror w8, w0, #7 +; CHECK-NEXT: and w0, w8, #0xf1ffffff +; CHECK-NEXT: ret + %lhs_div = lshr i32 %i, 7 + %rhs_div = lshr i32 %i, 3 + %rhs_shift = shl i32 %rhs_div, 28 + %out = or i32 %lhs_div, %rhs_shift + ret i32 %out +} + +define i32 @ror_extract_mul(i32 %i) nounwind { +; CHECK-LABEL: ror_extract_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w0, lsl #3 +; CHECK-NEXT: ror w0, w8, #25 +; CHECK-NEXT: ret + %lhs_mul = mul i32 %i, 9 + %rhs_mul = mul i32 %i, 1152 + %lhs_shift = lshr i32 %lhs_mul, 25 + %out = or i32 %lhs_shift, %rhs_mul + ret i32 %out +} + +define i64 @ror_extract_udiv(i64 %i) nounwind { +; CHECK-LABEL: ror_extract_udiv: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: movk x8, #43691 +; CHECK-NEXT: umulh x8, x0, x8 +; CHECK-NEXT: ror x8, x8, #5 +; CHECK-NEXT: and x0, x8, #0xf7ffffffffffffff +; CHECK-NEXT: ret + %lhs_div = udiv i64 %i, 3 + %rhs_div = udiv i64 %i, 48 + %lhs_shift = shl i64 %lhs_div, 60 + %out = or i64 %lhs_shift, %rhs_div + ret i64 %out +} + +define i64 @ror_extract_mul_with_mask(i64 %i) nounwind { +; CHECK-LABEL: ror_extract_mul_with_mask: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w0, lsl #3 +; CHECK-NEXT: lsl w8, w8, #7 +; CHECK-NEXT: add x9, x0, x0, lsl #3 +; CHECK-NEXT: and x0, x8, #0x80 +; CHECK-NEXT: bfxil x0, x9, #57, #7 +; CHECK-NEXT: ret + %lhs_mul = mul i64 %i, 1152 + %rhs_mul = mul i64 %i, 9 + %lhs_and = and i64 %lhs_mul, 160 + %rhs_shift = lshr i64 %rhs_mul, 57 + %out = or i64 %lhs_and, %rhs_shift + ret i64 %out +} + +; Result would undershift +define i64 @no_extract_shl(i64 %i) nounwind { +; CHECK-LABEL: no_extract_shl: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl x8, x0, #10 +; CHECK-NEXT: bfxil x8, x0, #52, #7 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret + %lhs_mul = shl i64 %i, 5 + %rhs_mul = shl i64 %i, 10 + %lhs_shift = lshr i64 %lhs_mul, 57 + %out = or i64 %lhs_shift, %rhs_mul + ret i64 %out +} + +; Result would overshift +define i32 @no_extract_shrl(i32 %i) nounwind { +; CHECK-LABEL: no_extract_shrl: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #3 +; CHECK-NEXT: lsr w0, w0, #9 +; CHECK-NEXT: bfi w0, w8, #28, #4 +; CHECK-NEXT: ret + %lhs_div = lshr i32 %i, 3 + %rhs_div = lshr i32 %i, 9 + %lhs_shift = shl i32 %lhs_div, 28 + %out = or i32 %lhs_shift, %rhs_div + ret i32 %out +} + +; Can factor 128 from 2304, but result is 18 instead of 9 +define i64 @no_extract_mul(i64 %i) nounwind { +; CHECK-LABEL: no_extract_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, x0, lsl #3 +; CHECK-NEXT: lsr x0, x8, #57 +; CHECK-NEXT: bfi x0, x8, #8, #56 +; CHECK-NEXT: ret + %lhs_mul = mul i64 %i, 2304 + %rhs_mul = mul i64 %i, 9 + %rhs_shift = lshr i64 %rhs_mul, 57 + %out = or i64 %lhs_mul, %rhs_shift + ret i64 %out +} + +; Can't evenly factor 16 from 49 +define i32 @no_extract_udiv(i32 %i) nounwind { +; CHECK-LABEL: no_extract_udiv: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w9, #33437 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: movk w9, #21399, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: umull x9, w0, w9 +; CHECK-NEXT: lsr x8, x8, #33 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: extr w0, w8, w9, #4 +; CHECK-NEXT: ret + %lhs_div = udiv i32 %i, 3 + %rhs_div = udiv i32 %i, 49 + %lhs_shift = shl i32 %lhs_div, 28 + %out = or i32 %lhs_shift, %rhs_div + ret i32 %out +} diff --git a/test/CodeGen/AArch64/scheduledag-constreg.mir b/test/CodeGen/AArch64/scheduledag-constreg.mir index 013f59f52a9c..65ec43407413 100644 --- a/test/CodeGen/AArch64/scheduledag-constreg.mir +++ b/test/CodeGen/AArch64/scheduledag-constreg.mir @@ -7,23 +7,23 @@ # Check that the instructions are not dependent on each other, even though # they all read/write to the zero register. # CHECK-LABEL: MI Scheduling -# CHECK: SU(0): dead %wzr = SUBSWri %w1, 0, 0, implicit-def dead %nzcv +# CHECK: SU(0): dead $wzr = SUBSWri $w1, 0, 0, implicit-def dead $nzcv # CHECK: # succs left : 0 # CHECK-NOT: Successors: -# CHECK: SU(1): %w2 = COPY %wzr +# CHECK: SU(1): $w2 = COPY $wzr # CHECK: # succs left : 0 # CHECK-NOT: Successors: -# CHECK: SU(2): dead %wzr = SUBSWri %w3, 0, 0, implicit-def dead %nzcv +# CHECK: SU(2): dead $wzr = SUBSWri $w3, 0, 0, implicit-def dead $nzcv # CHECK: # succs left : 0 # CHECK-NOT: Successors: -# CHECK: SU(3): %w4 = COPY %wzr +# CHECK: SU(3): $w4 = COPY $wzr # CHECK: # succs left : 0 # CHECK-NOT: Successors: name: func body: | bb.0: - dead %wzr = SUBSWri %w1, 0, 0, implicit-def dead %nzcv - %w2 = COPY %wzr - dead %wzr = SUBSWri %w3, 0, 0, implicit-def dead %nzcv - %w4 = COPY %wzr + dead $wzr = SUBSWri $w1, 0, 0, implicit-def dead $nzcv + $w2 = COPY $wzr + dead $wzr = SUBSWri $w3, 0, 0, implicit-def dead $nzcv + $w4 = COPY $wzr ... diff --git a/test/CodeGen/AArch64/sdivpow2.ll b/test/CodeGen/AArch64/sdivpow2.ll index 6c02ea9a467f..dd1c21b75b0c 100644 --- a/test/CodeGen/AArch64/sdivpow2.ll +++ b/test/CodeGen/AArch64/sdivpow2.ll @@ -1,73 +1,88 @@ -; RUN: llc -mtriple=arm64-linux-gnu -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu -fast-isel=1 -verify-machineinstrs < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ISEL +; RUN: llc -mtriple=aarch64-linux-gnu -fast-isel=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,FAST define i32 @test1(i32 %x) { -; CHECK-LABEL: test1 -; CHECK: add w8, w0, #7 -; CHECK: cmp w0, #0 -; CHECK: csel w8, w8, w0, lt -; CHECK: asr w0, w8, #3 +; CHECK-LABEL: test1: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #7 // =7 +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: asr w0, w8, #3 +; CHECK-NEXT: ret %div = sdiv i32 %x, 8 ret i32 %div } define i32 @test2(i32 %x) { -; CHECK-LABEL: test2 -; CHECK: add w8, w0, #7 -; CHECK: cmp w0, #0 -; CHECK: csel w8, w8, w0, lt -; CHECK: neg w0, w8, asr #3 +; CHECK-LABEL: test2: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #7 // =7 +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: neg w0, w8, asr #3 +; CHECK-NEXT: ret %div = sdiv i32 %x, -8 ret i32 %div } define i32 @test3(i32 %x) { -; CHECK-LABEL: test3 -; CHECK: add w8, w0, #31 -; CHECK: cmp w0, #0 -; CHECK: csel w8, w8, w0, lt -; CHECK: asr w0, w8, #5 +; CHECK-LABEL: test3: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #31 // =31 +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: asr w0, w8, #5 +; CHECK-NEXT: ret %div = sdiv i32 %x, 32 ret i32 %div } define i64 @test4(i64 %x) { -; CHECK-LABEL: test4 -; CHECK: add x8, x0, #7 -; CHECK: cmp x0, #0 -; CHECK: csel x8, x8, x0, lt -; CHECK: asr x0, x8, #3 +; CHECK-LABEL: test4: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #7 // =7 +; CHECK-NEXT: cmp x0, #0 // =0 +; CHECK-NEXT: csel x8, x8, x0, lt +; CHECK-NEXT: asr x0, x8, #3 +; CHECK-NEXT: ret %div = sdiv i64 %x, 8 ret i64 %div } define i64 @test5(i64 %x) { -; CHECK-LABEL: test5 -; CHECK: add x8, x0, #7 -; CHECK: cmp x0, #0 -; CHECK: csel x8, x8, x0, lt -; CHECK: neg x0, x8, asr #3 +; CHECK-LABEL: test5: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #7 // =7 +; CHECK-NEXT: cmp x0, #0 // =0 +; CHECK-NEXT: csel x8, x8, x0, lt +; CHECK-NEXT: neg x0, x8, asr #3 +; CHECK-NEXT: ret %div = sdiv i64 %x, -8 ret i64 %div } define i64 @test6(i64 %x) { -; CHECK-LABEL: test6 -; CHECK: add x8, x0, #63 -; CHECK: cmp x0, #0 -; CHECK: csel x8, x8, x0, lt -; CHECK: asr x0, x8, #6 +; CHECK-LABEL: test6: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #63 // =63 +; CHECK-NEXT: cmp x0, #0 // =0 +; CHECK-NEXT: csel x8, x8, x0, lt +; CHECK-NEXT: asr x0, x8, #6 +; CHECK-NEXT: ret %div = sdiv i64 %x, 64 ret i64 %div } define i64 @test7(i64 %x) { -; CHECK-LABEL: test7 -; CHECK: orr [[REG:x[0-9]+]], xzr, #0xffffffffffff -; CHECK: add x8, x0, [[REG]] -; CHECK: cmp x0, #0 -; CHECK: csel x8, x8, x0, lt -; CHECK: asr x0, x8, #48 +; CHECK-LABEL: test7: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, xzr, #0xffffffffffff +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: cmp x0, #0 // =0 +; CHECK-NEXT: csel x8, x8, x0, lt +; CHECK-NEXT: asr x0, x8, #48 +; CHECK-NEXT: ret %div = sdiv i64 %x, 281474976710656 ret i64 %div } diff --git a/test/CodeGen/AArch64/shadow-call-stack.ll b/test/CodeGen/AArch64/shadow-call-stack.ll new file mode 100644 index 000000000000..dbd44fd3cd17 --- /dev/null +++ b/test/CodeGen/AArch64/shadow-call-stack.ll @@ -0,0 +1,47 @@ +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mattr=+reserve-x18 | FileCheck %s + +define void @f1() shadowcallstack { + ; CHECK: f1: + ; CHECK-NOT: x18 + ; CHECK: ret + ret void +} + +declare void @foo() + +define void @f2() shadowcallstack { + ; CHECK: f2: + ; CHECK-NOT: x18 + ; CHECK: b foo + tail call void @foo() + ret void +} + +declare i32 @bar() + +define i32 @f3() shadowcallstack { + ; CHECK: f3: + ; CHECK: str x30, [x18], #8 + ; CHECK: str x30, [sp, #-16]! + %res = call i32 @bar() + %res1 = add i32 %res, 1 + ; CHECK: ldr x30, [sp], #16 + ; CHECK: ldr x30, [x18, #-8]! + ; CHECK: ret + ret i32 %res +} + +define i32 @f4() shadowcallstack { + ; CHECK: f4: + %res1 = call i32 @bar() + %res2 = call i32 @bar() + %res3 = call i32 @bar() + %res4 = call i32 @bar() + %res12 = add i32 %res1, %res2 + %res34 = add i32 %res3, %res4 + %res1234 = add i32 %res12, %res34 + ; CHECK: ldp {{.*}}x30, [sp + ; CHECK: ldr x30, [x18, #-8]! + ; CHECK: ret + ret i32 %res1234 +} diff --git a/test/CodeGen/AArch64/shift-mod.ll b/test/CodeGen/AArch64/shift-mod.ll new file mode 100644 index 000000000000..fdf1fa49ba3d --- /dev/null +++ b/test/CodeGen/AArch64/shift-mod.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +; Check that we optimize out AND instructions and ADD/SUB instructions +; modulo the shift size to take advantage of the implicit mod done on +; the shift amount value by the variable shift/rotate instructions. + +define i32 @test1(i32 %x, i64 %y) { +; CHECK-LABEL: test1: +; CHECK-NOT: and +; CHECK: lsr + %sh_prom = trunc i64 %y to i32 + %shr = lshr i32 %x, %sh_prom + ret i32 %shr +} + +define i64 @test2(i32 %x, i64 %y) { +; CHECK-LABEL: test2: +; CHECK-NOT: orr +; CHECK-NOT: sub +; CHECK: neg +; CHECK: asr + %sub9 = sub nsw i32 64, %x + %sh_prom12.i = zext i32 %sub9 to i64 + %shr.i = ashr i64 %y, %sh_prom12.i + ret i64 %shr.i +} + +define i64 @test3(i64 %x, i64 %y) { +; CHECK-LABEL: test3: +; CHECK-NOT: add +; CHECK: lsl + %add = add nsw i64 64, %x + %shl = shl i64 %y, %add + ret i64 %shl +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/shrink-wrapping-vla.ll b/test/CodeGen/AArch64/shrink-wrapping-vla.ll new file mode 100644 index 000000000000..07672584362a --- /dev/null +++ b/test/CodeGen/AArch64/shrink-wrapping-vla.ll @@ -0,0 +1,95 @@ +; Test shrink wrapping placement is correct with respect to calls to llvm.{stacksave,stackrestore} + +; void f(int n, int x[]) { +; if (n < 0) +; return; +; +; int a[n]; +; +; for (int i = 0; i < n; i++) +; a[i] = x[n - i - 1]; +; +; for (int i = 0; i < n; i++) +; x[i] = a[i] + 1; +; } +; +; RUN: llc -mtriple aarch64-linux %s -o - | FileCheck %s + +define dso_local void @f(i32 %n, i32* nocapture %x) { +entry: + %cmp = icmp slt i32 %n, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %0 = zext i32 %n to i64 + %1 = tail call i8* @llvm.stacksave() + %vla = alloca i32, i64 %0, align 16 + %cmp132 = icmp eq i32 %n, 0 + br i1 %cmp132, label %for.cond.cleanup8, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %if.end + %sub = add i32 %n, -1 + br label %for.body + +for.cond6.preheader: ; preds = %for.body + %cmp730 = icmp sgt i32 %n, 0 + br i1 %cmp730, label %for.body9, label %for.cond.cleanup8 + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv34 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next35, %for.body ] + %2 = trunc i64 %indvars.iv34 to i32 + %sub2 = sub i32 %sub, %2 + %idxprom = sext i32 %sub2 to i64 + %arrayidx = getelementptr inbounds i32, i32* %x, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32, i32* %vla, i64 %indvars.iv34 + store i32 %3, i32* %arrayidx4, align 4 + %indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1 + %exitcond37 = icmp eq i64 %indvars.iv.next35, %0 + br i1 %exitcond37, label %for.cond6.preheader, label %for.body + +for.cond.cleanup8: ; preds = %for.body9, %if.end, %for.cond6.preheader + tail call void @llvm.stackrestore(i8* %1) + br label %return + +for.body9: ; preds = %for.cond6.preheader, %for.body9 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body9 ], [ 0, %for.cond6.preheader ] + %arrayidx11 = getelementptr inbounds i32, i32* %vla, i64 %indvars.iv + %4 = load i32, i32* %arrayidx11, align 4 + %add = add nsw i32 %4, 1 + %arrayidx13 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv + store i32 %add, i32* %arrayidx13, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %0 + br i1 %exitcond, label %for.cond.cleanup8, label %for.body9 + +return: ; preds = %entry, %for.cond.cleanup8 + ret void +} + +; Function Attrs: nounwind +declare i8* @llvm.stacksave() + +; Function Attrs: nounwind +declare void @llvm.stackrestore(i8*) + +; Check that llvm.stackrestore() happens before CSRs are popped off the stack + +; CHECK-LABEL: f + +; CHECK: stp x29, x30, [sp, #-16]! +; CHECK-NEXT: mov x29, sp + +; VLA allocation +; CHECK: add [[X1:x[0-9]+]], [[X1]], #15 +; CHECK: mov [[X2:x[0-9]+]], sp +; CHECK: and [[X1]], [[X1]], #0x7fffffff0 +; Saving the SP via llvm.stacksave() +; CHECK: mov [[SAVE:x[0-9]+]], sp +; CHECK: sub [[X2]], [[X2]], [[X1]] + +; The next instruction comes from llvm.stackrestore() +; CHECK: mov sp, [[SAVE]] +; Epilogue +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 diff --git a/test/CodeGen/AArch64/signbit-shift.ll b/test/CodeGen/AArch64/signbit-shift.ll new file mode 100644 index 000000000000..b554ce15872c --- /dev/null +++ b/test/CodeGen/AArch64/signbit-shift.ll @@ -0,0 +1,268 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +; If positive... + +define i32 @zext_ifpos(i32 %x) { +; CHECK-LABEL: zext_ifpos: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %c = icmp sgt i32 %x, -1 + %e = zext i1 %c to i32 + ret i32 %e +} + +define i32 @add_zext_ifpos(i32 %x) { +; CHECK-LABEL: add_zext_ifpos: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #42 // =42 +; CHECK-NEXT: ret + %c = icmp sgt i32 %x, -1 + %e = zext i1 %c to i32 + %r = add i32 %e, 41 + ret i32 %r +} + +define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) { +; CHECK-LABEL: add_zext_ifpos_vec_splat: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v1.4s, #41 +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %e = zext <4 x i1> %c to <4 x i32> + %r = add <4 x i32> %e, <i32 41, i32 41, i32 41, i32 41> + ret <4 x i32> %r +} + +define i32 @sel_ifpos_tval_bigger(i32 %x) { +; CHECK-LABEL: sel_ifpos_tval_bigger: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: cinc w0, w8, ge +; CHECK-NEXT: ret + %c = icmp sgt i32 %x, -1 + %r = select i1 %c, i32 42, i32 41 + ret i32 %r +} + +define i32 @sext_ifpos(i32 %x) { +; CHECK-LABEL: sext_ifpos: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: asr w0, w8, #31 +; CHECK-NEXT: ret + %c = icmp sgt i32 %x, -1 + %e = sext i1 %c to i32 + ret i32 %e +} + +define i32 @add_sext_ifpos(i32 %x) { +; CHECK-LABEL: add_sext_ifpos: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #41 // =41 +; CHECK-NEXT: ret + %c = icmp sgt i32 %x, -1 + %e = sext i1 %c to i32 + %r = add i32 %e, 42 + ret i32 %r +} + +define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) { +; CHECK-LABEL: add_sext_ifpos_vec_splat: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v1.4s, #42 +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %e = sext <4 x i1> %c to <4 x i32> + %r = add <4 x i32> %e, <i32 42, i32 42, i32 42, i32 42> + ret <4 x i32> %r +} + +define i32 @sel_ifpos_fval_bigger(i32 %x) { +; CHECK-LABEL: sel_ifpos_fval_bigger: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: cinc w0, w8, lt +; CHECK-NEXT: ret + %c = icmp sgt i32 %x, -1 + %r = select i1 %c, i32 41, i32 42 + ret i32 %r +} + +; If negative... + +define i32 @zext_ifneg(i32 %x) { +; CHECK-LABEL: zext_ifneg: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ret + %c = icmp slt i32 %x, 0 + %r = zext i1 %c to i32 + ret i32 %r +} + +define i32 @add_zext_ifneg(i32 %x) { +; CHECK-LABEL: add_zext_ifneg: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #41 // =41 +; CHECK-NEXT: ret + %c = icmp slt i32 %x, 0 + %e = zext i1 %c to i32 + %r = add i32 %e, 41 + ret i32 %r +} + +define i32 @sel_ifneg_tval_bigger(i32 %x) { +; CHECK-LABEL: sel_ifneg_tval_bigger: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: cinc w0, w8, lt +; CHECK-NEXT: ret + %c = icmp slt i32 %x, 0 + %r = select i1 %c, i32 42, i32 41 + ret i32 %r +} + +define i32 @sext_ifneg(i32 %x) { +; CHECK-LABEL: sext_ifneg: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w0, w0, #31 +; CHECK-NEXT: ret + %c = icmp slt i32 %x, 0 + %r = sext i1 %c to i32 + ret i32 %r +} + +define i32 @add_sext_ifneg(i32 %x) { +; CHECK-LABEL: add_sext_ifneg: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: sub w0, w8, w0, lsr #31 +; CHECK-NEXT: ret + %c = icmp slt i32 %x, 0 + %e = sext i1 %c to i32 + %r = add i32 %e, 42 + ret i32 %r +} + +define i32 @sel_ifneg_fval_bigger(i32 %x) { +; CHECK-LABEL: sel_ifneg_fval_bigger: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: cinc w0, w8, ge +; CHECK-NEXT: ret + %c = icmp slt i32 %x, 0 + %r = select i1 %c, i32 41, i32 42 + ret i32 %r +} + +define i32 @add_lshr_not(i32 %x) { +; CHECK-LABEL: add_lshr_not: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #42 // =42 +; CHECK-NEXT: ret + %not = xor i32 %x, -1 + %sh = lshr i32 %not, 31 + %r = add i32 %sh, 41 + ret i32 %r +} + +define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) { +; CHECK-LABEL: add_lshr_not_vec_splat: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #43 +; CHECK-NEXT: ssra v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret + %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> + %r = add <4 x i32> %e, <i32 42, i32 42, i32 42, i32 42> + ret <4 x i32> %r +} + +define i32 @sub_lshr_not(i32 %x) { +; CHECK-LABEL: sub_lshr_not: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: bfxil w8, w0, #31, #1 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %not = xor i32 %x, -1 + %sh = lshr i32 %not, 31 + %r = sub i32 43, %sh + ret i32 %r +} + +define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) { +; CHECK-LABEL: sub_lshr_not_vec_splat: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #41 +; CHECK-NEXT: usra v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret + %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> + %e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> + %r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %e + ret <4 x i32> %r +} + +define i32 @sub_lshr(i32 %x, i32 %y) { +; CHECK-LABEL: sub_lshr: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w0, w1, w0, lsr #31 +; CHECK-NEXT: ret + %sh = lshr i32 %x, 31 + %r = sub i32 %y, %sh + ret i32 %r +} + +define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: sub_lshr_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: ushr v0.4s, v0.4s, #31 +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> + %r = sub <4 x i32> %y, %sh + ret <4 x i32> %r +} + +define i32 @sub_const_op_lshr(i32 %x) { +; CHECK-LABEL: sub_const_op_lshr: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43 +; CHECK-NEXT: sub w0, w8, w0, lsr #31 +; CHECK-NEXT: ret + %sh = lshr i32 %x, 31 + %r = sub i32 43, %sh + ret i32 %r +} + +define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) { +; CHECK-LABEL: sub_const_op_lshr_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: ushr v0.4s, v0.4s, #31 +; CHECK-NEXT: movi v1.4s, #42 +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> + %r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh + ret <4 x i32> %r +} + diff --git a/test/CodeGen/AArch64/signed-truncation-check.ll b/test/CodeGen/AArch64/signed-truncation-check.ll new file mode 100644 index 000000000000..6b5fffefe367 --- /dev/null +++ b/test/CodeGen/AArch64/signed-truncation-check.ll @@ -0,0 +1,394 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=38149 + +; We are truncating from wider width, and then sign-extending +; back to the original width. Then we equality-comparing orig and src. +; If they don't match, then we had signed truncation during truncation. + +; This can be expressed in a several ways in IR: +; trunc + sext + icmp eq <- not canonical +; shl + ashr + icmp eq +; add + icmp uge +; add + icmp ult/ule +; However only the simplest form (with two shifts) gets lowered best. + +; ---------------------------------------------------------------------------- ; +; shl + ashr + icmp eq +; ---------------------------------------------------------------------------- ; + +define i1 @shifts_eqcmp_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: shifts_eqcmp_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = shl i16 %x, 8 ; 16-8 + %tmp1 = ashr exact i16 %tmp0, 8 ; 16-8 + %tmp2 = icmp eq i16 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_eqcmp_i32_i16(i32 %x) nounwind { +; CHECK-LABEL: shifts_eqcmp_i32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = shl i32 %x, 16 ; 32-16 + %tmp1 = ashr exact i32 %tmp0, 16 ; 32-16 + %tmp2 = icmp eq i32 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_eqcmp_i32_i8(i32 %x) nounwind { +; CHECK-LABEL: shifts_eqcmp_i32_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = shl i32 %x, 24 ; 32-8 + %tmp1 = ashr exact i32 %tmp0, 24 ; 32-8 + %tmp2 = icmp eq i32 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_eqcmp_i64_i32(i64 %x) nounwind { +; CHECK-LABEL: shifts_eqcmp_i64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = shl i64 %x, 32 ; 64-32 + %tmp1 = ashr exact i64 %tmp0, 32 ; 64-32 + %tmp2 = icmp eq i64 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_eqcmp_i64_i16(i64 %x) nounwind { +; CHECK-LABEL: shifts_eqcmp_i64_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = shl i64 %x, 48 ; 64-16 + %tmp1 = ashr exact i64 %tmp0, 48 ; 64-16 + %tmp2 = icmp eq i64 %tmp1, %x + ret i1 %tmp2 +} + +define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind { +; CHECK-LABEL: shifts_eqcmp_i64_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = shl i64 %x, 56 ; 64-8 + %tmp1 = ashr exact i64 %tmp0, 56 ; 64-8 + %tmp2 = icmp eq i64 %tmp1, %x + ret i1 %tmp2 +} + +; ---------------------------------------------------------------------------- ; +; add + icmp uge +; ---------------------------------------------------------------------------- ; + +define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, #128 // =128 +; CHECK-NEXT: ubfx w8, w8, #8, #8 +; CHECK-NEXT: cmp w8, #254 // =254 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i16 %x, -128 ; ~0U << (8-1) + %tmp1 = icmp uge i16 %tmp0, -256 ; ~0U << 8 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, #8, lsl #12 // =32768 +; CHECK-NEXT: orr w9, wzr, #0xfffeffff +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i32 %x, -32768 ; ~0U << (16-1) + %tmp1 = icmp uge i32 %tmp0, -65536 ; ~0U << 16 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i32_i8(i32 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i32_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, #128 // =128 +; CHECK-NEXT: cmn w8, #257 // =257 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i32 %x, -128 ; ~0U << (8-1) + %tmp1 = icmp uge i32 %tmp0, -256 ; ~0U << 8 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-2147483648 +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: orr x9, xzr, #0xfffffffeffffffff +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1) + %tmp1 = icmp uge i64 %tmp0, -4294967296 ; ~0U << 32 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i64_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #8, lsl #12 // =32768 +; CHECK-NEXT: orr x9, xzr, #0xfffffffffffeffff +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i64 %x, -32768 ; ~0U << (16-1) + %tmp1 = icmp uge i64 %tmp0, -65536 ; ~0U << 16 + ret i1 %tmp1 +} + +define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { +; CHECK-LABEL: add_ugecmp_i64_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #128 // =128 +; CHECK-NEXT: cmn x8, #257 // =257 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %tmp0 = add i64 %x, -128 ; ~0U << (8-1) + %tmp1 = icmp uge i64 %tmp0, -256 ; ~0U << 8 + ret i1 %tmp1 +} + +; ---------------------------------------------------------------------------- ; +; add + icmp ult +; ---------------------------------------------------------------------------- ; + +define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i32_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = add i32 %x, 32768 ; 1U << (16-1) + %tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i32_i8(i32 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i32_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = add i32 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i64_i32(i64 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) + %tmp1 = icmp ult i64 %tmp0, 4294967296 ; 1U << 32 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i64_i16(i64 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i64_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = add i64 %x, 32768 ; 1U << (16-1) + %tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16 + ret i1 %tmp1 +} + +define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { +; CHECK-LABEL: add_ultcmp_i64_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = add i64 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +; Slightly more canonical variant +define i1 @add_ulecmp_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: add_ulecmp_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ule i16 %tmp0, 255 ; (1U << 8) - 1 + ret i1 %tmp1 +} + +; Negative tests +; ---------------------------------------------------------------------------- ; + +; Adding not a constant +define i1 @add_ultcmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: add_ultcmp_bad_i16_i8_add: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #256 // =256 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i16 %x, %y + %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +; Comparing not with a constant +define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: add_ultcmp_bad_i16_i8_cmp: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #128 // =128 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, w1, uxth +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ult i16 %tmp0, %y + ret i1 %tmp1 +} + +; Second constant is not larger than the first one +define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind { +; CHECK-LABEL: add_ultcmp_bad_i8_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: add w8, w8, #128 // =128 +; CHECK-NEXT: lsr w0, w8, #16 +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ult i16 %tmp0, 128 ; 1U << (8-1) + ret i1 %tmp1 +} + +; First constant is not power of two +define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { +; CHECK-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #192 // =192 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #256 // =256 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1)) + %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +; Second constant is not power of two +define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind { +; CHECK-LABEL: add_ultcmp_bad_i16_i8_c1notpoweroftwo: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #128 // =128 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #768 // =768 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ult i16 %tmp0, 768 ; (1U << 8)) + (1U << (8+1)) + ret i1 %tmp1 +} + +; Magic check fails, 64 << 1 != 256 +define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind { +; CHECK-LABEL: add_ultcmp_bad_i16_i8_magic: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #64 // =64 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #256 // =256 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 64 ; 1U << (8-1-1) + %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +; Bad 'destination type' +define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind { +; CHECK-LABEL: add_ultcmp_bad_i16_i4: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #8 // =8 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #16 // =16 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 8 ; 1U << (4-1) + %tmp1 = icmp ult i16 %tmp0, 16 ; 1U << 4 + ret i1 %tmp1 +} + +; Bad storage type +define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind { +; CHECK-LABEL: add_ultcmp_bad_i24_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #128 // =128 +; CHECK-NEXT: and w8, w8, #0xffffff +; CHECK-NEXT: cmp w8, #256 // =256 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %tmp0 = add i24 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ult i24 %tmp0, 256 ; 1U << 8 + ret i1 %tmp1 +} + +define i1 @add_ulecmp_bad_i16_i8(i16 %x) nounwind { +; CHECK-LABEL: add_ulecmp_bad_i16_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w0, wzr, #0x1 +; CHECK-NEXT: ret + %tmp0 = add i16 %x, 128 ; 1U << (8-1) + %tmp1 = icmp ule i16 %tmp0, -1 ; when we +1 it, it will wrap to 0 + ret i1 %tmp1 +} diff --git a/test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll b/test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll new file mode 100644 index 000000000000..7c4a3238c2c1 --- /dev/null +++ b/test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +; CHECK-LABEL: %bb.0: +; CHECK-NOT: stp +; CHECK-NOT: mov w{{[0-9]+}}, w0 +; CHECK-LABEL: %bb.1: +; CHECK: stp x19 +; CHECK: mov w{{[0-9]+}}, w0 + +define i32 @shrinkwrapme(i32 %paramAcrossCall, i32 %paramNotAcrossCall) { +entry: + %cmp5 = icmp sgt i32 %paramNotAcrossCall, 0 + br i1 %cmp5, label %CallBB, label %Exit +CallBB: + %call = call i32 @fun() + %add = add i32 %call, %paramAcrossCall + ret i32 %add +Exit: + ret i32 0 +} + +declare i32 @fun() diff --git a/test/CodeGen/AArch64/spill-fold.mir b/test/CodeGen/AArch64/spill-fold.mir new file mode 100644 index 000000000000..6cfd48529f9f --- /dev/null +++ b/test/CodeGen/AArch64/spill-fold.mir @@ -0,0 +1,82 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s +--- | + define i64 @test_subreg_spill_fold() { ret i64 0 } + define i64 @test_subreg_spill_fold2() { ret i64 0 } + define i64 @test_subreg_spill_fold3() { ret i64 0 } + define i64 @test_subreg_fill_fold() { ret i64 0 } + define double @test_subreg_fill_fold2() { ret double 0.0 } +... +--- +# CHECK-LABEL: name: test_subreg_spill_fold +# Ensure that the spilled subreg COPY is eliminated and folded into the spill store. +name: test_subreg_spill_fold +registers: + - { id: 0, class: gpr64 } +body: | + bb.0: + ; CHECK: STRXui $xzr, %stack.0, 0 :: (store 8 into %stack.0) + undef %0.sub_32 = COPY $wzr + INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp + $x0 = COPY %0 + RET_ReallyLR implicit $x0 +... +--- +# CHECK-LABEL: name: test_subreg_spill_fold2 +# Similar to test_subreg_spill_fold, but with a %0 register class not containing %WZR. +name: test_subreg_spill_fold2 +registers: + - { id: 0, class: gpr64sp } +body: | + bb.0: + ; CHECK: STRXui $xzr, %stack.0, 0 :: (store 8 into %stack.0) + undef %0.sub_32 = COPY $wzr + INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp + $x0 = ADDXri %0, 1, 0 + RET_ReallyLR implicit $x0 +... +--- +# CHECK-LABEL: name: test_subreg_spill_fold3 +# Similar to test_subreg_spill_fold, but with a cross register class copy. +name: test_subreg_spill_fold3 +registers: + - { id: 0, class: fpr64 } +body: | + bb.0: + ; CHECK: STRXui $xzr, %stack.0, 0 :: (store 8 into %stack.0) + undef %0.ssub = COPY $wzr + INLINEASM &nop, 1, 12, implicit-def dead $d0, 12, implicit-def dead $d1, 12, implicit-def dead $d2, 12, implicit-def dead $d3, 12, implicit-def dead $d4, 12, implicit-def dead $d5, 12, implicit-def dead $d6, 12, implicit-def dead $d7, 12, implicit-def dead $d8, 12, implicit-def dead $d9, 12, implicit-def dead $d10, 12, implicit-def dead $d11, 12, implicit-def dead $d12, 12, implicit-def dead $d13, 12, implicit-def dead $d14, 12, implicit-def dead $d15, 12, implicit-def dead $d16, 12, implicit-def dead $d17, 12, implicit-def dead $d18, 12, implicit-def dead $d19, 12, implicit-def dead $d20, 12, implicit-def dead $d21, 12, implicit-def dead $d22, 12, implicit-def dead $d23, 12, implicit-def dead $d24, 12, implicit-def dead $d25, 12, implicit-def dead $d26, 12, implicit-def dead $d27, 12, implicit-def dead $d28, 12, implicit-def dead $d29, 12, implicit-def dead $d30, 12, implicit-def $d31 + $x0 = COPY %0 + RET_ReallyLR implicit $x0 +... +--- +# CHECK-LABEL: name: test_subreg_fill_fold +# Ensure that the filled COPY is eliminated and folded into the fill load. +name: test_subreg_fill_fold +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr64 } +body: | + bb.0: + %0 = COPY $wzr + INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp + ; CHECK: undef %1.sub_32:gpr64 = LDRWui %stack.0, 0 :: (load 4 from %stack.0) + undef %1.sub_32 = COPY %0 + $x0 = COPY %1 + RET_ReallyLR implicit $x0 +... +--- +# CHECK-LABEL: name: test_subreg_fill_fold2 +# Similar to test_subreg_fill_fold, but with a cross-class copy. +name: test_subreg_fill_fold2 +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: fpr64 } +body: | + bb.0: + %0 = COPY $wzr + INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp + ; CHECK: undef %1.ssub:fpr64 = LDRSui %stack.0, 0 :: (load 4 from %stack.0) + undef %1.ssub = COPY %0 + $d0 = COPY %1 + RET_ReallyLR implicit $d0 +... diff --git a/test/CodeGen/AArch64/spill-stack-realignment.mir b/test/CodeGen/AArch64/spill-stack-realignment.mir new file mode 100644 index 000000000000..a6837bc3d4b5 --- /dev/null +++ b/test/CodeGen/AArch64/spill-stack-realignment.mir @@ -0,0 +1,35 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s + +# Ensure references to scavenged stack slots in the CSR area use the +# FP as a base when the stack pointer must be aligned to something +# larger than required by the target. This is necessary because the +# alignment padding area is between the CSR area and the SP, so the SP +# cannot be used to reference the CSR area. +name: test +tracksRegLiveness: true +frameInfo: + maxAlignment: 64 +# CHECK: stack: +# CHECK: id: 0, name: '', type: default, offset: -64, size: 4, alignment: 64 +# CHECK-NEXT: stack-id: 0 +# CHECK-NEXT: local-offset: -64 +# CHECK: id: 1, name: '', type: default, offset: -20, size: 4, alignment: 4 +# CHECK-NEXT: stack-id: 0 +# CHECK-NEXT: local-offset: -68 +stack: + - { id: 0, size: 4, alignment: 64, local-offset: -64 } + - { id: 1, size: 4, alignment: 4, local-offset: -68 } + +# CHECK: body: +# CHECK: $sp = ANDXri killed ${{x[0-9]+}}, 7865 +# CHECK: STRSui $s0, $sp, 0 +# CHECK: STURSi $s0, $fp, -4 +body: | + bb.0.entry: + liveins: $s0 + + STRSui $s0, %stack.0, 0 + STRSui $s0, %stack.1, 0 + ; Force preserve a CSR to create a hole in the CSR stack region. + $x28 = IMPLICIT_DEF + RET_ReallyLR diff --git a/test/CodeGen/AArch64/spill-undef.mir b/test/CodeGen/AArch64/spill-undef.mir index c4f589b5cc49..9fb0c44ac0d5 100644 --- a/test/CodeGen/AArch64/spill-undef.mir +++ b/test/CodeGen/AArch64/spill-undef.mir @@ -30,7 +30,7 @@ registers: - { id: 9, class: gpr64 } body: | bb.0: - liveins: %x0 + liveins: $x0 successors: %bb.1, %bb.2 ; %8 is going to be spilled. @@ -43,25 +43,25 @@ body: | ; %9 us going to be spilled. ; But it is only partially undef. ; Make sure we spill it properly - ; CHECK: [[NINE:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[NINE:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[NINE]].sub_32:gpr64 = IMPLICIT_DEF ; CHECK-NEXT: STRXui [[NINE]] - %9 = COPY %x0 + %9 = COPY $x0 %9.sub_32 = IMPLICIT_DEF - CBNZW %wzr, %bb.2 + CBNZW $wzr, %bb.2 B %bb.1 bb.1: %4 = ADRP target-flags(aarch64-page) @g %8 = LDRWui %4, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile dereferenceable load 4 from @g) - INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr + INLINEASM &nop, 1, 12, implicit-def dead early-clobber $x0, 12, implicit-def dead early-clobber $x1, 12, implicit-def dead early-clobber $x2, 12, implicit-def dead early-clobber $x3, 12, implicit-def dead early-clobber $x4, 12, implicit-def dead early-clobber $x5, 12, implicit-def dead early-clobber $x6, 12, implicit-def dead early-clobber $x7, 12, implicit-def dead early-clobber $x8, 12, implicit-def dead early-clobber $x9, 12, implicit-def dead early-clobber $x10, 12, implicit-def dead early-clobber $x11, 12, implicit-def dead early-clobber $x12, 12, implicit-def dead early-clobber $x13, 12, implicit-def dead early-clobber $x14, 12, implicit-def dead early-clobber $x15, 12, implicit-def dead early-clobber $x16, 12, implicit-def dead early-clobber $x17, 12, implicit-def dead early-clobber $x18, 12, implicit-def dead early-clobber $x19, 12, implicit-def dead early-clobber $x20, 12, implicit-def dead early-clobber $x21, 12, implicit-def dead early-clobber $x22, 12, implicit-def dead early-clobber $x23, 12, implicit-def dead early-clobber $x24, 12, implicit-def dead early-clobber $x25, 12, implicit-def dead early-clobber $x26, 12, implicit-def dead early-clobber $x27, 12, implicit-def dead early-clobber $x28, 12, implicit-def dead early-clobber $fp, 12, implicit-def dead early-clobber $lr bb.2: - INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr + INLINEASM &nop, 1, 12, implicit-def dead early-clobber $x0, 12, implicit-def dead early-clobber $x1, 12, implicit-def dead early-clobber $x2, 12, implicit-def dead early-clobber $x3, 12, implicit-def dead early-clobber $x4, 12, implicit-def dead early-clobber $x5, 12, implicit-def dead early-clobber $x6, 12, implicit-def dead early-clobber $x7, 12, implicit-def dead early-clobber $x8, 12, implicit-def dead early-clobber $x9, 12, implicit-def dead early-clobber $x10, 12, implicit-def dead early-clobber $x11, 12, implicit-def dead early-clobber $x12, 12, implicit-def dead early-clobber $x13, 12, implicit-def dead early-clobber $x14, 12, implicit-def dead early-clobber $x15, 12, implicit-def dead early-clobber $x16, 12, implicit-def dead early-clobber $x17, 12, implicit-def dead early-clobber $x18, 12, implicit-def dead early-clobber $x19, 12, implicit-def dead early-clobber $x20, 12, implicit-def dead early-clobber $x21, 12, implicit-def dead early-clobber $x22, 12, implicit-def dead early-clobber $x23, 12, implicit-def dead early-clobber $x24, 12, implicit-def dead early-clobber $x25, 12, implicit-def dead early-clobber $x26, 12, implicit-def dead early-clobber $x27, 12, implicit-def dead early-clobber $x28, 12, implicit-def dead early-clobber $fp, 12, implicit-def dead early-clobber $lr %6 = ADRP target-flags(aarch64-page) @g - %w0 = MOVi32imm 42 + $w0 = MOVi32imm 42 STRWui %8, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 4 into @g) STRXui %9, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 8 into @g) - RET_ReallyLR implicit killed %w0 + RET_ReallyLR implicit killed $w0 ... diff --git a/test/CodeGen/AArch64/sqrt-fastmath.ll b/test/CodeGen/AArch64/sqrt-fastmath.ll index ade9e3d8df32..a559b7868575 100644 --- a/test/CodeGen/AArch64/sqrt-fastmath.ll +++ b/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s @@ -10,289 +11,447 @@ declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0 define float @fsqrt(float %a) #0 { +; FAULT-LABEL: fsqrt: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt s0, s0 +; FAULT-NEXT: ret +; +; CHECK-LABEL: fsqrt: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte s1, s0 +; CHECK-NEXT: fmul s2, s1, s1 +; CHECK-NEXT: frsqrts s2, s0, s2 +; CHECK-NEXT: fmul s1, s1, s2 +; CHECK-NEXT: fmul s2, s1, s1 +; CHECK-NEXT: frsqrts s2, s0, s2 +; CHECK-NEXT: fmul s2, s2, s0 +; CHECK-NEXT: fmul s1, s1, s2 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fcsel s0, s0, s1, eq +; CHECK-NEXT: ret %1 = tail call fast float @llvm.sqrt.f32(float %a) ret float %1 +} -; FAULT-LABEL: fsqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: fsqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:s[0-7]]] -; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]] -; CHECK: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} -; CHECK-NOT: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} -; CHECK: fcmp {{s[0-7]}}, #0 +define float @fsqrt_ieee_denorms(float %a) #1 { +; FAULT-LABEL: fsqrt_ieee_denorms: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt s0, s0 +; FAULT-NEXT: ret +; +; CHECK-LABEL: fsqrt_ieee_denorms: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte s1, s0 +; CHECK-NEXT: fmul s2, s1, s1 +; CHECK-NEXT: frsqrts s2, s0, s2 +; CHECK-NEXT: fmul s1, s1, s2 +; CHECK-NEXT: fmul s2, s1, s1 +; CHECK-NEXT: frsqrts s2, s0, s2 +; CHECK-NEXT: fmul s2, s2, s0 +; CHECK-NEXT: fmul s1, s1, s2 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fcsel s0, s0, s1, eq +; CHECK-NEXT: ret + %1 = tail call fast float @llvm.sqrt.f32(float %a) + ret float %1 } define <2 x float> @f2sqrt(<2 x float> %a) #0 { - %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) - ret <2 x float> %1 - ; FAULT-LABEL: f2sqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v0.2s, v0.2s +; FAULT-NEXT: ret +; ; CHECK-LABEL: f2sqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]] -; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} -; CHECK-NOT: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} -; CHECK: fcmeq {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, #0 +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v1.2s, v0.2s +; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s +; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s +; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s +; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s +; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s +; CHECK-NEXT: fmul v2.2s, v2.2s, v0.2s +; CHECK-NEXT: fmul v2.2s, v1.2s, v2.2s +; CHECK-NEXT: fcmeq v1.2s, v0.2s, #0.0 +; CHECK-NEXT: bsl v1.8b, v0.8b, v2.8b +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret + %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) + ret <2 x float> %1 } define <4 x float> @f4sqrt(<4 x float> %a) #0 { - %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) - ret <4 x float> %1 - ; FAULT-LABEL: f4sqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v0.4s, v0.4s +; FAULT-NEXT: ret +; ; CHECK-LABEL: f4sqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] -; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0 +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v1.4s, v0.4s +; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s +; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s +; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s +; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s +; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s +; CHECK-NEXT: fmul v2.4s, v2.4s, v0.4s +; CHECK-NEXT: fmul v2.4s, v1.4s, v2.4s +; CHECK-NEXT: fcmeq v1.4s, v0.4s, #0.0 +; CHECK-NEXT: bsl v1.16b, v0.16b, v2.16b +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret + %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %1 } define <8 x float> @f8sqrt(<8 x float> %a) #0 { - %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a) - ret <8 x float> %1 - ; FAULT-LABEL: f8sqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt -; FAULT-NEXT: fsqrt - +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v0.4s, v0.4s +; FAULT-NEXT: fsqrt v1.4s, v1.4s +; FAULT-NEXT: ret +; ; CHECK-LABEL: f8sqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] -; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0 -; CHECK: frsqrte [[RC:v[0-7]\.4s]] -; CHECK-NEXT: fmul [[RD:v[0-7]\.4s]], [[RC]], [[RC]] -; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RD]] -; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0 +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v2.4s, v0.4s +; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s +; CHECK-NEXT: frsqrts v3.4s, v0.4s, v3.4s +; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s +; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s +; CHECK-NEXT: frsqrts v3.4s, v0.4s, v3.4s +; CHECK-NEXT: fmul v3.4s, v3.4s, v0.4s +; CHECK-NEXT: fmul v3.4s, v2.4s, v3.4s +; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0 +; CHECK-NEXT: bsl v2.16b, v0.16b, v3.16b +; CHECK-NEXT: frsqrte v0.4s, v1.4s +; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s +; CHECK-NEXT: frsqrts v3.4s, v1.4s, v3.4s +; CHECK-NEXT: fmul v0.4s, v0.4s, v3.4s +; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s +; CHECK-NEXT: frsqrts v3.4s, v1.4s, v3.4s +; CHECK-NEXT: fmul v3.4s, v3.4s, v1.4s +; CHECK-NEXT: fmul v0.4s, v0.4s, v3.4s +; CHECK-NEXT: fcmeq v3.4s, v1.4s, #0.0 +; CHECK-NEXT: bsl v3.16b, v1.16b, v0.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ret + %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a) + ret <8 x float> %1 } define double @dsqrt(double %a) #0 { +; FAULT-LABEL: dsqrt: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt d0, d0 +; FAULT-NEXT: ret +; +; CHECK-LABEL: dsqrt: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte d1, d0 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d2, d2, d0 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: fcsel d0, d0, d1, eq +; CHECK-NEXT: ret %1 = tail call fast double @llvm.sqrt.f64(double %a) ret double %1 +} -; FAULT-LABEL: dsqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: dsqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:d[0-7]]] -; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]] -; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} -; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} -; CHECK-NOT: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} -; CHECK: fcmp {{d[0-7]}}, #0 +define double @dsqrt_ieee_denorms(double %a) #1 { +; FAULT-LABEL: dsqrt_ieee_denorms: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt d0, d0 +; FAULT-NEXT: ret +; +; CHECK-LABEL: dsqrt_ieee_denorms: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte d1, d0 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d2, d2, d0 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: fcsel d0, d0, d1, eq +; CHECK-NEXT: ret + %1 = tail call fast double @llvm.sqrt.f64(double %a) + ret double %1 } define <2 x double> @d2sqrt(<2 x double> %a) #0 { - %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) - ret <2 x double> %1 - ; FAULT-LABEL: d2sqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v0.2d, v0.2d +; FAULT-NEXT: ret +; ; CHECK-LABEL: d2sqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] -; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0 +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v1.2d, v0.2d +; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d +; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d +; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d +; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d +; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d +; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d +; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d +; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d +; CHECK-NEXT: fmul v2.2d, v2.2d, v0.2d +; CHECK-NEXT: fmul v2.2d, v1.2d, v2.2d +; CHECK-NEXT: fcmeq v1.2d, v0.2d, #0.0 +; CHECK-NEXT: bsl v1.16b, v0.16b, v2.16b +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret + %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) + ret <2 x double> %1 } define <4 x double> @d4sqrt(<4 x double> %a) #0 { - %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) - ret <4 x double> %1 - ; FAULT-LABEL: d4sqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt -; FAULT-NEXT: fsqrt - +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v0.2d, v0.2d +; FAULT-NEXT: fsqrt v1.2d, v1.2d +; FAULT-NEXT: ret +; ; CHECK-LABEL: d4sqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] -; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0 -; CHECK: frsqrte [[RC:v[0-7]\.2d]] -; CHECK-NEXT: fmul [[RD:v[0-7]\.2d]], [[RC]], [[RC]] -; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RD]] -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0 +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v2.2d, v0.2d +; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d +; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d +; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d +; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d +; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d +; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d +; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d +; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d +; CHECK-NEXT: fmul v3.2d, v3.2d, v0.2d +; CHECK-NEXT: fmul v3.2d, v2.2d, v3.2d +; CHECK-NEXT: fcmeq v2.2d, v0.2d, #0.0 +; CHECK-NEXT: bsl v2.16b, v0.16b, v3.16b +; CHECK-NEXT: frsqrte v0.2d, v1.2d +; CHECK-NEXT: fmul v3.2d, v0.2d, v0.2d +; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d +; CHECK-NEXT: fmul v0.2d, v0.2d, v3.2d +; CHECK-NEXT: fmul v3.2d, v0.2d, v0.2d +; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d +; CHECK-NEXT: fmul v0.2d, v0.2d, v3.2d +; CHECK-NEXT: fmul v3.2d, v0.2d, v0.2d +; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d +; CHECK-NEXT: fmul v3.2d, v3.2d, v1.2d +; CHECK-NEXT: fmul v0.2d, v0.2d, v3.2d +; CHECK-NEXT: fcmeq v3.2d, v1.2d, #0.0 +; CHECK-NEXT: bsl v3.16b, v1.16b, v0.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ret + %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) + ret <4 x double> %1 } define float @frsqrt(float %a) #0 { +; FAULT-LABEL: frsqrt: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt s0, s0 +; FAULT-NEXT: fmov s1, #1.00000000 +; FAULT-NEXT: fdiv s0, s1, s0 +; FAULT-NEXT: ret +; +; CHECK-LABEL: frsqrt: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte s1, s0 +; CHECK-NEXT: fmul s2, s1, s1 +; CHECK-NEXT: frsqrts s2, s0, s2 +; CHECK-NEXT: fmul s1, s1, s2 +; CHECK-NEXT: fmul s2, s1, s1 +; CHECK-NEXT: frsqrts s0, s0, s2 +; CHECK-NEXT: fmul s0, s1, s0 +; CHECK-NEXT: ret %1 = tail call fast float @llvm.sqrt.f32(float %a) %2 = fdiv fast float 1.000000e+00, %1 ret float %2 - -; FAULT-LABEL: frsqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: frsqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:s[0-7]]] -; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]] -; CHECK: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} -; CHECK-NOT: frsqrts {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} -; CHECK-NOT: fcmp {{s[0-7]}}, #0 } define <2 x float> @f2rsqrt(<2 x float> %a) #0 { +; FAULT-LABEL: f2rsqrt: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v0.2s, v0.2s +; FAULT-NEXT: fmov v1.2s, #1.00000000 +; FAULT-NEXT: fdiv v0.2s, v1.2s, v0.2s +; FAULT-NEXT: ret +; +; CHECK-LABEL: f2rsqrt: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v1.2s, v0.2s +; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s +; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s +; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s +; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s +; CHECK-NEXT: frsqrts v0.2s, v0.2s, v2.2s +; CHECK-NEXT: fmul v0.2s, v1.2s, v0.2s +; CHECK-NEXT: ret %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1 ret <2 x float> %2 - -; FAULT-LABEL: f2rsqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: f2rsqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]] -; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} -; CHECK-NOT: frsqrts {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} -; CHECK-NOT: fcmeq {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, #0 } define <4 x float> @f4rsqrt(<4 x float> %a) #0 { +; FAULT-LABEL: f4rsqrt: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v0.4s, v0.4s +; FAULT-NEXT: fmov v1.4s, #1.00000000 +; FAULT-NEXT: fdiv v0.4s, v1.4s, v0.4s +; FAULT-NEXT: ret +; +; CHECK-LABEL: f4rsqrt: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v1.4s, v0.4s +; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s +; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s +; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s +; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s +; CHECK-NEXT: frsqrts v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1 ret <4 x float> %2 - -; FAULT-LABEL: f4rsqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: f4rsqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] -; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK-NOT: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0 } define <8 x float> @f8rsqrt(<8 x float> %a) #0 { +; FAULT-LABEL: f8rsqrt: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v1.4s, v1.4s +; FAULT-NEXT: fsqrt v0.4s, v0.4s +; FAULT-NEXT: fmov v2.4s, #1.00000000 +; FAULT-NEXT: fdiv v0.4s, v2.4s, v0.4s +; FAULT-NEXT: fdiv v1.4s, v2.4s, v1.4s +; FAULT-NEXT: ret +; +; CHECK-LABEL: f8rsqrt: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v2.4s, v0.4s +; CHECK-NEXT: fmul v4.4s, v2.4s, v2.4s +; CHECK-NEXT: frsqrte v3.4s, v1.4s +; CHECK-NEXT: frsqrts v4.4s, v0.4s, v4.4s +; CHECK-NEXT: fmul v2.4s, v2.4s, v4.4s +; CHECK-NEXT: fmul v4.4s, v3.4s, v3.4s +; CHECK-NEXT: frsqrts v4.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v3.4s, v3.4s, v4.4s +; CHECK-NEXT: fmul v4.4s, v2.4s, v2.4s +; CHECK-NEXT: frsqrts v0.4s, v0.4s, v4.4s +; CHECK-NEXT: fmul v4.4s, v3.4s, v3.4s +; CHECK-NEXT: frsqrts v1.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fmul v1.4s, v3.4s, v1.4s +; CHECK-NEXT: ret %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a) %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1 ret <8 x float> %2 - -; FAULT-LABEL: f8rsqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: f8rsqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] -; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] -; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK-NOT: frsqrts {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} -; CHECK-NOT: fcmeq {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, #0 } define double @drsqrt(double %a) #0 { +; FAULT-LABEL: drsqrt: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt d0, d0 +; FAULT-NEXT: fmov d1, #1.00000000 +; FAULT-NEXT: fdiv d0, d1, d0 +; FAULT-NEXT: ret +; +; CHECK-LABEL: drsqrt: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte d1, d0 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d0, d0, d2 +; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: ret %1 = tail call fast double @llvm.sqrt.f64(double %a) %2 = fdiv fast double 1.000000e+00, %1 ret double %2 - -; FAULT-LABEL: drsqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: drsqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:d[0-7]]] -; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]] -; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} -; CHECK: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} -; CHECK-NOT: frsqrts {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} -; CHECK-NOT: fcmp d0, #0 } define <2 x double> @d2rsqrt(<2 x double> %a) #0 { +; FAULT-LABEL: d2rsqrt: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v0.2d, v0.2d +; FAULT-NEXT: fmov v1.2d, #1.00000000 +; FAULT-NEXT: fdiv v0.2d, v1.2d, v0.2d +; FAULT-NEXT: ret +; +; CHECK-LABEL: d2rsqrt: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v1.2d, v0.2d +; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d +; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d +; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d +; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d +; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d +; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d +; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d +; CHECK-NEXT: frsqrts v0.2d, v0.2d, v2.2d +; CHECK-NEXT: fmul v0.2d, v1.2d, v0.2d +; CHECK-NEXT: ret %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1 ret <2 x double> %2 - -; FAULT-LABEL: d2rsqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: d2rsqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] -; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] -; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK-NOT: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0 } define <4 x double> @d4rsqrt(<4 x double> %a) #0 { +; FAULT-LABEL: d4rsqrt: +; FAULT: // %bb.0: +; FAULT-NEXT: fsqrt v1.2d, v1.2d +; FAULT-NEXT: fsqrt v0.2d, v0.2d +; FAULT-NEXT: fmov v2.2d, #1.00000000 +; FAULT-NEXT: fdiv v0.2d, v2.2d, v0.2d +; FAULT-NEXT: fdiv v1.2d, v2.2d, v1.2d +; FAULT-NEXT: ret +; +; CHECK-LABEL: d4rsqrt: +; CHECK: // %bb.0: +; CHECK-NEXT: frsqrte v2.2d, v0.2d +; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d +; CHECK-NEXT: frsqrte v3.2d, v1.2d +; CHECK-NEXT: frsqrts v4.2d, v0.2d, v4.2d +; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d +; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d +; CHECK-NEXT: frsqrts v4.2d, v1.2d, v4.2d +; CHECK-NEXT: fmul v3.2d, v3.2d, v4.2d +; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d +; CHECK-NEXT: frsqrts v4.2d, v0.2d, v4.2d +; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d +; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d +; CHECK-NEXT: frsqrts v4.2d, v1.2d, v4.2d +; CHECK-NEXT: fmul v3.2d, v3.2d, v4.2d +; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d +; CHECK-NEXT: frsqrts v0.2d, v0.2d, v4.2d +; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d +; CHECK-NEXT: frsqrts v1.2d, v1.2d, v4.2d +; CHECK-NEXT: fmul v0.2d, v2.2d, v0.2d +; CHECK-NEXT: fmul v1.2d, v3.2d, v1.2d +; CHECK-NEXT: ret %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1 ret <4 x double> %2 - -; FAULT-LABEL: d4rsqrt: -; FAULT-NEXT: %bb.0 -; FAULT-NEXT: fsqrt -; FAULT-NEXT: fsqrt - -; CHECK-LABEL: d4rsqrt: -; CHECK-NEXT: %bb.0 -; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] -; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] -; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK-NOT: frsqrts {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} -; CHECK-NOT: fcmeq {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, #0 } -attributes #0 = { nounwind "unsafe-fp-math"="true" } +attributes #0 = { "unsafe-fp-math"="true" } +attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" } + diff --git a/test/CodeGen/AArch64/stackguard-internal.ll b/test/CodeGen/AArch64/stackguard-internal.ll new file mode 100644 index 000000000000..6dcdf1619851 --- /dev/null +++ b/test/CodeGen/AArch64/stackguard-internal.ll @@ -0,0 +1,21 @@ +; RUN: llc -O3 %s -o - | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linux-gnu" + +; Make sure we correctly lower stack guards even if __stack_chk_guard +; is an alias. (The alias is created by GlobalMerge.) +; CHECK: adrp {{.*}}, __stack_chk_guard +; CHECK: ldr {{.*}}, [{{.*}}, :lo12:__stack_chk_guard] +; CHECK: .set __stack_chk_guard, .L_MergedGlobals+4 + +@__stack_chk_guard = internal global [8 x i32] zeroinitializer, align 4 +@x = internal global i32 0, align 4 + +define i32 @b() nounwind sspstrong { +entry: + %z = alloca [10 x i32], align 4 + %arraydecay = getelementptr inbounds [10 x i32], [10 x i32]* %z, i64 0, i64 0 + %call = call i32 @a(i32* getelementptr inbounds ([8 x i32], [8 x i32]* @__stack_chk_guard, i64 0, i64 0), i32* nonnull @x, i32* nonnull %arraydecay) #3 + ret i32 %call +} +declare i32 @a(i32*, i32*, i32*) diff --git a/test/CodeGen/AArch64/strqro.ll b/test/CodeGen/AArch64/strqro.ll index 218248d54f85..3705c4a81abd 100644 --- a/test/CodeGen/AArch64/strqro.ll +++ b/test/CodeGen/AArch64/strqro.ll @@ -1,5 +1,6 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-STRQRO %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=falkor | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTRQRO %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-STRQRO +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=slow-strqro-store | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRQRO +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=falkor | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRQRO ; CHECK-LABEL: strqrox: ; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x diff --git a/test/CodeGen/AArch64/strqu.ll b/test/CodeGen/AArch64/strqu.ll new file mode 100644 index 000000000000..94b9ff3c3bae --- /dev/null +++ b/test/CodeGen/AArch64/strqu.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu | FileCheck --check-prefixes=CHECK,NOSPLIT %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu | FileCheck --check-prefixes=CHECK,NOSPLIT %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu -mcpu=exynos-m1 | FileCheck --check-prefixes=CHECK,NOSPLIT %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu -mcpu=exynos-m1 | FileCheck --check-prefixes=CHECK,SPLIT %s + +; CHECK-LABEL: test_split_f: +; NOSPLIT: str q{{[0-9]+}}, [x{{[0-9]+}}] +; SPLIT: st1 { v{{[0-9]+}}.2s }, [x{{[0-9]+}}] +; SPLIT: st1 { v{{[0-9]+}}.2s }, [x{{[0-9]+}}] +define void @test_split_f(<4 x float> %val, <4 x float>* %addr) { + store <4 x float> %val, <4 x float>* %addr, align 8 + ret void +} + +; CHECK-LABEL: test_split_d: +; NOSPLIT: str q{{[0-9]+}}, [x{{[0-9]+}}] +; SPLIT: st1 { v{{[0-9]+}}.2d }, [x{{[0-9]+}}] +define void @test_split_d(<2 x double> %val, <2 x double>* %addr) { + store <2 x double> %val, <2 x double>* %addr, align 8 + ret void +} + +; CHECK-LABEL: test_split_128: +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}] +define void @test_split_128(fp128 %val, fp128* %addr) { + store fp128 %val, fp128* %addr, align 8 + ret void +} diff --git a/test/CodeGen/AArch64/sub1.ll b/test/CodeGen/AArch64/sub1.ll new file mode 100644 index 000000000000..f882adfe178a --- /dev/null +++ b/test/CodeGen/AArch64/sub1.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +define i64 @sub1_disguised_constant(i64 %x) { +; CHECK-LABEL: sub1_disguised_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, #1 // =1 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: and x0, x8, #0xffff +; CHECK-NEXT: ret + %a1 = and i64 %x, 65535 + %a2 = add i64 %x, 65535 + %r = and i64 %a1, %a2 + ret i64 %r +} + diff --git a/test/CodeGen/AArch64/swift-return.ll b/test/CodeGen/AArch64/swift-return.ll index 15c19ce36196..b909482dc0bf 100644 --- a/test/CodeGen/AArch64/swift-return.ll +++ b/test/CodeGen/AArch64/swift-return.ll @@ -1,5 +1,5 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s -; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 ; CHECK-LABEL: test1 ; CHECK: bl _gen diff --git a/test/CodeGen/AArch64/swifterror.ll b/test/CodeGen/AArch64/swifterror.ll index bcad19e391d0..637ff3e2e29b 100644 --- a/test/CodeGen/AArch64/swifterror.ll +++ b/test/CodeGen/AArch64/swifterror.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -disable-fp-elim -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-APPLE %s -; RUN: llc -verify-machineinstrs -disable-fp-elim -O0 < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -disable-fp-elim -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE %s +; RUN: llc -fast-isel-sink-local-values -verify-machineinstrs -disable-fp-elim -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 %s declare i8* @malloc(i64) declare void @free(i8*) @@ -40,11 +40,11 @@ define float @caller(i8* %error_ref) { ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo +; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: cbnz x21 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller: @@ -189,10 +189,10 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-O0:[[BB2]]: ; CHECK-O0: ldr x0, [sp, [[SLOT2]]] ; CHECK-O0: fcmp -; CHECK-O0: str x0, [sp] +; CHECK-O0: str x0, [sp, [[SLOT3:#[0-9]+]] ; CHECK-O0: b.le [[BB1]] ; reload from stack -; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp] +; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT3]]] ; CHECK-O0: mov x21, [[ID3]] ; CHECK-O0: ret entry: @@ -263,11 +263,11 @@ define float @caller3(i8* %error_ref) { ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_sret +; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: cbnz x21 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -316,12 +316,11 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) { ; First vararg ; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8 ; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16] -; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #8 ; Second vararg ; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8 ; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16 ; Third vararg -; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}] +; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8 ; CHECK-APPLE: mov x21, x0 ; CHECK-APPLE-NOT: x21 @@ -358,11 +357,11 @@ define float @caller4(i8* %error_ref) { ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_vararg +; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: cbnz x21 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* diff --git a/test/CodeGen/AArch64/swiftself.ll b/test/CodeGen/AArch64/swiftself.ll index 33a49198430e..f19c852cb9b1 100644 --- a/test/CodeGen/AArch64/swiftself.ll +++ b/test/CodeGen/AArch64/swiftself.ll @@ -1,5 +1,5 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s -; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s ; Parameter with swiftself should be allocated to x20. diff --git a/test/CodeGen/AArch64/tailcall-fastisel.ll b/test/CodeGen/AArch64/tailcall-fastisel.ll index 3ba639183161..ea173de274ed 100644 --- a/test/CodeGen/AArch64/tailcall-fastisel.ll +++ b/test/CodeGen/AArch64/tailcall-fastisel.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 -fast-isel | FileCheck %s ; CHECK: b _foo0 diff --git a/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll index b970fb124151..c780d15b58db 100644 --- a/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll +++ b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll @@ -4,7 +4,7 @@ ; CHECK: b memcpy define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -12,7 +12,7 @@ entry: ; CHECK: b memmove define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -20,12 +20,12 @@ entry: ; CHECK: b memset define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { entry: - tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/tailcall-string-rvo.ll b/test/CodeGen/AArch64/tailcall-string-rvo.ll index bdc09235afd9..d9d2180b5ef0 100644 --- a/test/CodeGen/AArch64/tailcall-string-rvo.ll +++ b/test/CodeGen/AArch64/tailcall-string-rvo.ll @@ -32,7 +32,7 @@ bb: %tmp1 = bitcast %class.basic_string.11.42.73* %arg to %union.anon.8.39.70** store %union.anon.8.39.70* %tmp, %union.anon.8.39.70** %tmp1, align 8 %tmp2 = bitcast %union.anon.8.39.70* %tmp to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i1 false) %tmp3 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 1 store i64 13, i64* %tmp3, align 8 %tmp4 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 2, i32 1, i64 5 @@ -42,6 +42,6 @@ bb: } ; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0 attributes #0 = { argmemonly nounwind } diff --git a/test/CodeGen/AArch64/tailcall_misched_graph.ll b/test/CodeGen/AArch64/tailcall_misched_graph.ll index b926594e4504..8b7f9796e611 100644 --- a/test/CodeGen/AArch64/tailcall_misched_graph.ll +++ b/test/CodeGen/AArch64/tailcall_misched_graph.ll @@ -29,7 +29,7 @@ declare void @callee2(i8*, i8*, i8*, i8*, i8*, ; CHECK: [[VRA:%.*]]:gpr64 = LDRXui %fixed-stack.3 ; CHECK: [[VRB:%.*]]:gpr64 = LDRXui %fixed-stack.2 ; CHECK: STRXui %{{.*}}, %fixed-stack.0 -; CHECK: STRXui [[VRB]], %fixed-stack.1 +; CHECK: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1 ; Make sure that there is an dependence edge between fi#-2 and fi#-4. ; Without this edge the scheduler would be free to move the store accross the load. diff --git a/test/CodeGen/AArch64/taildup-cfi.ll b/test/CodeGen/AArch64/taildup-cfi.ll new file mode 100644 index 000000000000..5c7cbaad7c15 --- /dev/null +++ b/test/CodeGen/AArch64/taildup-cfi.ll @@ -0,0 +1,94 @@ +; REQUIRES: asserts +; RUN: llc -mtriple=arm64-unknown-linux-gnu -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LINUX +; RUN: llc -mtriple=arm64-apple-darwin -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DARWIN + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@g = common local_unnamed_addr global i32 0, align 4 +@f = common local_unnamed_addr global i32 0, align 4 +@a = common local_unnamed_addr global i32 0, align 4 +@m = common local_unnamed_addr global i32 0, align 4 +@l = common local_unnamed_addr global i32 0, align 4 +@j = common local_unnamed_addr global i32 0, align 4 +@k = common local_unnamed_addr global i32 0, align 4 +@i = common local_unnamed_addr global i32 0, align 4 +@d = common local_unnamed_addr global i32 0, align 4 +@c = common local_unnamed_addr global i32 0, align 4 +@e = common local_unnamed_addr global i32 0, align 4 +@h = common local_unnamed_addr global i32 0, align 4 + +; Function Attrs: norecurse nounwind uwtable +define void @n(i32 %o, i32* nocapture readonly %b) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* @g, align 4, !tbaa !2 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %entry.if.end_crit_edge, label %if.then + +entry.if.end_crit_edge: ; preds = %entry + %.pre = load i32, i32* @f, align 4, !tbaa !2 + br label %if.end + +if.then: ; preds = %entry + store i32 0, i32* @f, align 4, !tbaa !2 + br label %if.end + +; DARWIN-NOT: Merging into block +; LINUX: Merging into block + +if.end: ; preds = %entry.if.end_crit_edge, %if.then + %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ 0, %if.then ] + %cmp6 = icmp slt i32 %1, %o + br i1 %cmp6, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %if.end + %.pre7 = load i32, i32* @a, align 4, !tbaa !2 + %.pre8 = load i32, i32* @l, align 4, !tbaa !2 + %.pre9 = load i32, i32* @j, align 4, !tbaa !2 + %.pre10 = load i32, i32* @k, align 4, !tbaa !2 + %.pre11 = load i32, i32* @i, align 4, !tbaa !2 + br label %for.body + +for.body: ; preds = %if.end5, %for.body.lr.ph + %2 = phi i32 [ %.pre11, %for.body.lr.ph ], [ %7, %if.end5 ] + %3 = phi i32 [ %.pre10, %for.body.lr.ph ], [ %8, %if.end5 ] + %4 = phi i32 [ %.pre9, %for.body.lr.ph ], [ %9, %if.end5 ] + %5 = phi i32 [ %1, %for.body.lr.ph ], [ %inc, %if.end5 ] + store i32 %.pre7, i32* @m, align 4, !tbaa !2 + %mul = mul nsw i32 %3, %4 + %cmp1 = icmp sgt i32 %.pre8, %mul + %conv = zext i1 %cmp1 to i32 + %cmp2 = icmp slt i32 %2, %conv + br i1 %cmp2, label %if.then4, label %if.end5 + +if.then4: ; preds = %for.body + %6 = load i32, i32* @d, align 4, !tbaa !2 + store i32 %6, i32* @k, align 4, !tbaa !2 + store i32 %6, i32* @i, align 4, !tbaa !2 + store i32 %6, i32* @j, align 4, !tbaa !2 + br label %if.end5 + +if.end5: ; preds = %if.then4, %for.body + %7 = phi i32 [ %6, %if.then4 ], [ %2, %for.body ] + %8 = phi i32 [ %6, %if.then4 ], [ %3, %for.body ] + %9 = phi i32 [ %6, %if.then4 ], [ %4, %for.body ] + %10 = load i32, i32* @c, align 4, !tbaa !2 + %idxprom = sext i32 %10 to i64 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom + %11 = load i32, i32* %arrayidx, align 4, !tbaa !2 + %12 = load i32, i32* @e, align 4, !tbaa !2 + %sub = sub nsw i32 %11, %12 + store i32 %sub, i32* @h, align 4, !tbaa !2 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* @f, align 4, !tbaa !2 + %exitcond = icmp eq i32 %inc, %o + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %if.end5, %if.end + ret void +} + +attributes #0 = { norecurse nounwind uwtable } + +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{} diff --git a/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll b/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll new file mode 100644 index 000000000000..477fc3793e47 --- /dev/null +++ b/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -0,0 +1,271 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [byte3] [byte0] +; Y: [byte2][byte1] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: out8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #2 +; CHECK-NEXT: bfi w1, w8, #2, #4 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i8 %x, 60 + %my = and i8 %y, -61 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: out16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #4 +; CHECK-NEXT: bfi w1, w8, #4, #8 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i16 %x, 4080 + %my = and i16 %y, -4081 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: out32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #8 +; CHECK-NEXT: bfi w1, w8, #8, #16 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i32 %x, 16776960 + %my = and i32 %y, -16776961 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: out64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #16 +; CHECK-NEXT: bfi x1, x8, #16, #32 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret + %mx = and i64 %x, 281474976645120 + %my = and i64 %y, -281474976645121 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: in8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x3c +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 60 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: in16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xff0 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 4080 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: in32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: in64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x1 +; CHECK-NEXT: and x8, x8, #0xffffffff0000 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 281474976645120 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y0_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xffff00 +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y1_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xffff00 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_A_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, #0xffff00 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_B_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, #0xffff00 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-LABEL: n0_badconstmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #256 +; CHECK-NEXT: movk w9, #65280, lsl #16 +; CHECK-NEXT: and w8, w0, #0xffff00 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 16776960 + %my = and i32 %y, -16776960 ; instead of -16776961 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: n1_thirdvar_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} diff --git a/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll b/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll new file mode 100644 index 000000000000..1fc6a0a521c5 --- /dev/null +++ b/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -0,0 +1,277 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [bit2] [bit0] +; Y: [bit3] [bit1] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: out8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #85 +; CHECK-NEXT: mov w9, #-86 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i8 %x, 85 + %my = and i8 %y, -86 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: out16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #21845 +; CHECK-NEXT: mov w9, #-21846 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i16 %x, 21845 + %my = and i16 %y, -21846 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: out32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0x55555555 +; CHECK-NEXT: and w9, w1, #0xaaaaaaaa +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 1431655765 + %my = and i32 %y, -1431655766 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: out64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x0, #0x5555555555555555 +; CHECK-NEXT: and x9, x1, #0xaaaaaaaaaaaaaaaa +; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: ret + %mx = and i64 %x, 6148914691236517205 + %my = and i64 %y, -6148914691236517206 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: in8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: mov w9, #85 +; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 85 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: in16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: mov w9, #21845 +; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 21845 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: in32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: in64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x1 +; CHECK-NEXT: and x8, x8, #0x5555555555555555 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 6148914691236517205 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y0_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0x55555555 +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y1_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0x55555555 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_A_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, #0x55555555 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_B_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, #0x55555555 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-LABEL: n0_badconstmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: and w8, w0, #0x55555555 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 1431655765 + %my = and i32 %y, -1431655765 ; instead of -1431655766 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: n1_thirdvar_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} diff --git a/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll b/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll new file mode 100644 index 000000000000..9020151bf1cf --- /dev/null +++ b/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -0,0 +1,273 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [bit 3210] +; Y: [bit 7654] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: out8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil w1, w0, #0, #4 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i8 %x, 15 + %my = and i8 %y, -16 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: out16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3855 +; CHECK-NEXT: mov w9, #-3856 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i16 %x, 3855 + %my = and i16 %y, -3856 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: out32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xf0f0f0f +; CHECK-NEXT: and w9, w1, #0xf0f0f0f0 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 252645135 + %my = and i32 %y, -252645136 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: out64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x0, #0xf0f0f0f0f0f0f0f +; CHECK-NEXT: and x9, x1, #0xf0f0f0f0f0f0f0f0 +; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: ret + %mx = and i64 %x, 1085102592571150095 + %my = and i64 %y, -1085102592571150096 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: in8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 15 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: in16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: mov w9, #3855 +; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 3855 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: in32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: in64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x1 +; CHECK-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 1085102592571150095 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y0_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xf0f0f0f +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y1_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xf0f0f0f +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_A_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, #0xf0f0f0f +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_B_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, #0xf0f0f0f +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-LABEL: n0_badconstmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #61681 +; CHECK-NEXT: movk w9, #61680, lsl #16 +; CHECK-NEXT: and w8, w0, #0xf0f0f0f +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 252645135 + %my = and i32 %y, -252645135 ; instead of -252645136 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: n1_thirdvar_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} diff --git a/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll b/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll new file mode 100644 index 000000000000..8e5ff65a9c19 --- /dev/null +++ b/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [byte1][byte0] +; Y: [byte3][byte2] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: out8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil w1, w0, #0, #4 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i8 %x, 15 + %my = and i8 %y, -16 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: out16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil w1, w0, #0, #8 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i16 %x, 255 + %my = and i16 %y, -256 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: out32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil w1, w0, #0, #16 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i32 %x, 65535 + %my = and i32 %y, -65536 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: out64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil x1, x0, #0, #32 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret + %mx = and i64 %x, 4294967295 + %my = and i64 %y, -4294967296 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: in8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 15 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: in16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xff +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 255 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: in32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: in64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 4294967295 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y0_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xffff +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y1_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xffff +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_A_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, #0xffff +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_B_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, #0xffff +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-LABEL: n0_badconstmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: and w9, w1, #0xffff0001 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 65535 + %my = and i32 %y, -65535 ; instead of -65536 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: n1_thirdvar_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} diff --git a/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll b/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll new file mode 100644 index 000000000000..6cc4bf4ec596 --- /dev/null +++ b/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -0,0 +1,632 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +define i8 @out8(i8 %x, i8 %y, i8 %mask) { +; CHECK-LABEL: out8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i8 %x, %mask + %notmask = xor i8 %mask, -1 + %my = and i8 %y, %notmask + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16(i16 %x, i16 %y, i16 %mask) { +; CHECK-LABEL: out16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i16 %x, %mask + %notmask = xor i16 %mask, -1 + %my = and i16 %y, %notmask + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, %mask + %notmask = xor i32 %mask, -1 + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64(i64 %x, i64 %y, i64 %mask) { +; CHECK-LABEL: out64: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x0, x2 +; CHECK-NEXT: bic x9, x1, x2 +; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: ret + %mx = and i64 %x, %mask + %notmask = xor i64 %mask, -1 + %my = and i64 %y, %notmask + %r = or i64 %mx, %my + ret i64 %r +} +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8(i8 %x, i8 %y, i8 %mask) { +; CHECK-LABEL: in8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, %mask + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16(i16 %x, i16 %y, i16 %mask) { +; CHECK-LABEL: in16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, %mask + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in32: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64(i64 %x, i64 %y, i64 %mask) { +; CHECK-LABEL: in64: +; CHECK: // %bb.0: +; CHECK-NEXT: bic x8, x1, x2 +; CHECK-NEXT: and x9, x0, x2 +; CHECK-NEXT: orr x0, x9, x8 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, %mask + %r = xor i64 %n1, %y + ret i64 %r +} +; ============================================================================ ; +; Commutativity tests. +; ============================================================================ ; +define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_0_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_0_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} +define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_0_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} +define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_1_0_0: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} +define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_1_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} +define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_1_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} +define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_1_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; +define i32 @in_complex_y0(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { +; CHECK-LABEL: in_complex_y0: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: and w9, w0, w3 +; CHECK-NEXT: bic w8, w8, w3 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_y1(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { +; CHECK-LABEL: in_complex_y1: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: and w9, w0, w3 +; CHECK-NEXT: bic w8, w8, w3 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %y, %n1 + ret i32 %r +} +; ============================================================================ ; +; M is an 'xor' too. +; ============================================================================ ; +define i32 @in_complex_m0(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_m0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w2, w3 +; CHECK-NEXT: bic w9, w1, w8 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_m1(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_m1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w2, w3 +; CHECK-NEXT: bic w9, w1, w8 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %n1, %y + ret i32 %r +} +; ============================================================================ ; +; Both Y and M are complex. +; ============================================================================ ; +define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_y0_m0: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w3, w4 +; CHECK-NEXT: bic w8, w8, w9 +; CHECK-NEXT: and w9, w0, w9 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_y1_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_y1_m0: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w3, w4 +; CHECK-NEXT: bic w8, w8, w9 +; CHECK-NEXT: and w9, w0, w9 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %y, %n1 + ret i32 %r +} +define i32 @in_complex_y0_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_y0_m1: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w3, w4 +; CHECK-NEXT: bic w8, w8, w9 +; CHECK-NEXT: and w9, w0, w9 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_y1_m1: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w3, w4 +; CHECK-NEXT: bic w8, w8, w9 +; CHECK-NEXT: and w9, w0, w9 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %y, %n1 + ret i32 %r +} +; ============================================================================ ; +; Various cases with %x and/or %y being a constant +; ============================================================================ ; +define i32 @out_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out_constant_varx_mone: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w2, w0 +; CHECK-NEXT: orn w0, w8, w2 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %mx = and i32 %mask, %x + %my = and i32 %notmask, -1 + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_constant_varx_mone: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w2, w0 +; CHECK-NEXT: mvn w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, -1 ; %x + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, -1 + ret i32 %r +} +; This is not a canonical form. Testing for completeness only. +define i32 @out_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out_constant_varx_mone_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %mx = and i32 %notmask, %x + %my = and i32 %mask, -1 + %r = or i32 %mx, %my + ret i32 %r +} +; This is not a canonical form. Testing for completeness only. +define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_constant_varx_mone_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: bic w8, w8, w2 +; CHECK-NEXT: mvn w0, w8 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %n0 = xor i32 %x, -1 ; %x + %n1 = and i32 %n0, %notmask + %r = xor i32 %n1, -1 + ret i32 %r +} +define i32 @out_constant_varx_42(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out_constant_varx_42: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: and w8, w2, w0 +; CHECK-NEXT: bic w9, w9, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %mx = and i32 %mask, %x + %my = and i32 %notmask, 42 + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_constant_varx_42: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: bic w8, w8, w2 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, 42 ; %x + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, 42 + ret i32 %r +} +; This is not a canonical form. Testing for completeness only. +define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out_constant_varx_42_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w2, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %mx = and i32 %notmask, %x + %my = and i32 %mask, 42 + %r = or i32 %mx, %my + ret i32 %r +} +; This is not a canonical form. Testing for completeness only. +define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_constant_varx_42_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: and w8, w2, w8 +; CHECK-NEXT: bic w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %n0 = xor i32 %x, 42 ; %x + %n1 = and i32 %n0, %notmask + %r = xor i32 %n1, 42 + ret i32 %r +} +define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out_constant_mone_vary: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: orr w0, w2, w8 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %mx = and i32 %mask, -1 + %my = and i32 %notmask, %y + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @in_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_constant_mone_vary: +; CHECK: // %bb.0: +; CHECK-NEXT: bic w8, w2, w1 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 -1, %y ; %x + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +; This is not a canonical form. Testing for completeness only. +define i32 @out_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out_constant_mone_vary_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w2, w1 +; CHECK-NEXT: orn w0, w8, w2 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %mx = and i32 %notmask, -1 + %my = and i32 %mask, %y + %r = or i32 %mx, %my + ret i32 %r +} +; This is not a canonical form. Testing for completeness only. +define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_constant_mone_vary_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w1 +; CHECK-NEXT: bic w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %n0 = xor i32 -1, %y ; %x + %n1 = and i32 %n0, %notmask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @out_constant_42_vary(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out_constant_42_vary: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: and w8, w2, w8 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %mx = and i32 %mask, 42 + %my = and i32 %notmask, %y + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_constant_42_vary: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: and w9, w2, w9 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %n0 = xor i32 42, %y ; %x + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +; This is not a canonical form. Testing for completeness only. +define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out_constant_42_vary_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: bic w8, w8, w2 +; CHECK-NEXT: and w9, w2, w1 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %mx = and i32 %notmask, 42 + %my = and i32 %mask, %y + %r = or i32 %mx, %my + ret i32 %r +} +; This is not a canonical form. Testing for completeness only. +define i32 @in_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_constant_42_vary_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: bic w9, w9, w2 +; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ret + %notmask = xor i32 %mask, -1 + %n0 = xor i32 42, %y ; %x + %n1 = and i32 %n0, %notmask + %r = xor i32 %n1, %y + ret i32 %r +} +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; +; Multi-use tests. +declare void @use32(i32) nounwind +define i32 @in_multiuse_A(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { +; CHECK-LABEL: in_multiuse_A: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, w3 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_multiuse_B(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { +; CHECK-LABEL: in_multiuse_B: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, w3 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} +; Various bad variants +define i32 @n0_badmask(i32 %x, i32 %y, i32 %mask, i32 %mask2) { +; CHECK-LABEL: n0_badmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w3 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, %mask + %notmask = xor i32 %mask2, -1 ; %mask2 instead of %mask + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @n0_badxor(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: n0_badxor: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w9, w2, #0x1 +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, %mask + %notmask = xor i32 %mask, 1 ; instead of -1 + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @n1_thirdvar(i32 %x, i32 %y, i32 %z, i32 %mask) { +; CHECK-LABEL: n1_thirdvar: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, w3 +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} diff --git a/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll b/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll new file mode 100644 index 000000000000..ee150f1e5bdc --- /dev/null +++ b/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll @@ -0,0 +1,227 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; ============================================================================ ; +; Various cases with %x and/or %y being a constant +; ============================================================================ ; + +define <4 x i32> @out_constant_varx_mone(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_constant_varx_mone: +; CHECK: // %bb.0: +; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: orn v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %mx = and <4 x i32> %mask, %x + %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1> + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @in_constant_varx_mone(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: in_constant_varx_mone: +; CHECK: // %bb.0: +; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret + %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x + %n1 = and <4 x i32> %n0, %mask + %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1> + ret <4 x i32> %r +} + +; This is not a canonical form. Testing for completeness only. +define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_constant_varx_mone_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %mx = and <4 x i32> %notmask, %x + %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +; This is not a canonical form. Testing for completeness only. +define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: in_constant_varx_mone_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x + %n1 = and <4 x i32> %n0, %notmask + %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1> + ret <4 x i32> %r +} + +define <4 x i32> @out_constant_varx_42(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_constant_varx_42: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #42 +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %mx = and <4 x i32> %mask, %x + %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42> + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @in_constant_varx_42(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: in_constant_varx_42: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #42 +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x + %n1 = and <4 x i32> %n0, %mask + %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42> + ret <4 x i32> %r +} + +; This is not a canonical form. Testing for completeness only. +define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_constant_varx_42_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #42 +; CHECK-NEXT: bsl v2.16b, v1.16b, v0.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %mx = and <4 x i32> %notmask, %x + %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42> + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +; This is not a canonical form. Testing for completeness only. +define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: in_constant_varx_42_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #42 +; CHECK-NEXT: bsl v2.16b, v1.16b, v0.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x + %n1 = and <4 x i32> %n0, %notmask + %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42> + ret <4 x i32> %r +} + +define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_constant_mone_vary: +; CHECK: // %bb.0: +; CHECK-NEXT: bic v0.16b, v1.16b, v2.16b +; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %my = and <4 x i32> %notmask, %y + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: in_constant_mone_vary: +; CHECK: // %bb.0: +; CHECK-NEXT: bic v0.16b, v1.16b, v2.16b +; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b +; CHECK-NEXT: ret + %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x + %n1 = and <4 x i32> %n0, %mask + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +; This is not a canonical form. Testing for completeness only. +define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_constant_mone_vary_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and v0.16b, v2.16b, v1.16b +; CHECK-NEXT: orn v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1> + %my = and <4 x i32> %mask, %y + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +; This is not a canonical form. Testing for completeness only. +define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: in_constant_mone_vary_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and v0.16b, v1.16b, v2.16b +; CHECK-NEXT: orn v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x + %n1 = and <4 x i32> %n0, %notmask + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +define <4 x i32> @out_constant_42_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_constant_42_vary: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: movi v2.4s, #42 +; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42> + %my = and <4 x i32> %notmask, %y + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @in_constant_42_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: in_constant_42_vary: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: movi v2.4s, #42 +; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ret + %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x + %n1 = and <4 x i32> %n0, %mask + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +; This is not a canonical form. Testing for completeness only. +define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_constant_42_vary_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: movi v2.4s, #42 +; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42> + %my = and <4 x i32> %mask, %y + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +; This is not a canonical form. Testing for completeness only. +define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: in_constant_42_vary_invmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: movi v2.4s, #42 +; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x + %n1 = and <4 x i32> %n0, %notmask + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} diff --git a/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll b/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll new file mode 100644 index 000000000000..df86540fdd96 --- /dev/null +++ b/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll @@ -0,0 +1,452 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; All the advanced stuff (negative tests, commutativity) is handled in the +; scalar version of the test only. + +; ============================================================================ ; +; 8-bit vector width +; ============================================================================ ; + +define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { +; CHECK-LABEL: out_v1i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <1 x i8> %x, %mask + %notmask = xor <1 x i8> %mask, <i8 -1> + %my = and <1 x i8> %y, %notmask + %r = or <1 x i8> %mx, %my + ret <1 x i8> %r +} + +; ============================================================================ ; +; 16-bit vector width +; ============================================================================ ; + +define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { +; CHECK-LABEL: out_v2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi d3, #0x0000ff000000ff +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %mx = and <2 x i8> %x, %mask + %notmask = xor <2 x i8> %mask, <i8 -1, i8 -1> + %my = and <2 x i8> %y, %notmask + %r = or <2 x i8> %mx, %my + ret <2 x i8> %r +} + +define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { +; CHECK-LABEL: out_v1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <1 x i16> %x, %mask + %notmask = xor <1 x i16> %mask, <i16 -1> + %my = and <1 x i16> %y, %notmask + %r = or <1 x i16> %mx, %my + ret <1 x i16> %r +} + +; ============================================================================ ; +; 32-bit vector width +; ============================================================================ ; + +define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { +; CHECK-LABEL: out_v4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi d3, #0xff00ff00ff00ff +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %mx = and <4 x i8> %x, %mask + %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1> + %my = and <4 x i8> %y, %notmask + %r = or <4 x i8> %mx, %my + ret <4 x i8> %r +} + +define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { +; CHECK-LABEL: out_v4i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: movi d3, #0xff00ff00ff00ff +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %mx = and <4 x i8> %x, %mask + %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1> + %my = and <4 x i8> %y, %notmask + %r = or <4 x i8> %mx, %my + ret <4 x i8> %r +} + +define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { +; CHECK-LABEL: out_v2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movi d3, #0x00ffff0000ffff +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %mx = and <2 x i16> %x, %mask + %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1> + %my = and <2 x i16> %y, %notmask + %r = or <2 x i16> %mx, %my + ret <2 x i16> %r +} + +define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { +; CHECK-LABEL: out_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <1 x i32> %x, %mask + %notmask = xor <1 x i32> %mask, <i32 -1> + %my = and <1 x i32> %y, %notmask + %r = or <1 x i32> %mx, %my + ret <1 x i32> %r +} + +; ============================================================================ ; +; 64-bit vector width +; ============================================================================ ; + +define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { +; CHECK-LABEL: out_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <8 x i8> %x, %mask + %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %my = and <8 x i8> %y, %notmask + %r = or <8 x i8> %mx, %my + ret <8 x i8> %r +} + +define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { +; CHECK-LABEL: out_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <4 x i16> %x, %mask + %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1> + %my = and <4 x i16> %y, %notmask + %r = or <4 x i16> %mx, %my + ret <4 x i16> %r +} + +define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { +; CHECK-LABEL: out_v4i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <4 x i16> %x, %mask + %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1> + %my = and <4 x i16> %y, %notmask + %r = or <4 x i16> %mx, %my + ret <4 x i16> %r +} + +define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { +; CHECK-LABEL: out_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <2 x i32> %x, %mask + %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1> + %my = and <2 x i32> %y, %notmask + %r = or <2 x i32> %mx, %my + ret <2 x i32> %r +} + +define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { +; CHECK-LABEL: out_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <1 x i64> %x, %mask + %notmask = xor <1 x i64> %mask, <i64 -1> + %my = and <1 x i64> %y, %notmask + %r = or <1 x i64> %mx, %my + ret <1 x i64> %r +} + +; ============================================================================ ; +; 128-bit vector width +; ============================================================================ ; + +define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { +; CHECK-LABEL: out_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <16 x i8> %x, %mask + %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %my = and <16 x i8> %y, %notmask + %r = or <16 x i8> %mx, %my + ret <16 x i8> %r +} + +define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { +; CHECK-LABEL: out_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <8 x i16> %x, %mask + %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> + %my = and <8 x i16> %y, %notmask + %r = or <8 x i16> %mx, %my + ret <8 x i16> %r +} + +define <4 x i32> @out_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { +; CHECK-LABEL: out_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_v4i32_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { +; CHECK-LABEL: out_v4i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1> + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { +; CHECK-LABEL: out_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <2 x i64> %x, %mask + %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1> + %my = and <2 x i64> %y, %notmask + %r = or <2 x i64> %mx, %my + ret <2 x i64> %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; ============================================================================ ; +; 8-bit vector width +; ============================================================================ ; + +define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { +; CHECK-LABEL: in_v1i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <1 x i8> %x, %y + %n1 = and <1 x i8> %n0, %mask + %r = xor <1 x i8> %n1, %y + ret <1 x i8> %r +} + +; ============================================================================ ; +; 16-bit vector width +; ============================================================================ ; + +define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { +; CHECK-LABEL: in_v2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <2 x i8> %x, %y + %n1 = and <2 x i8> %n0, %mask + %r = xor <2 x i8> %n1, %y + ret <2 x i8> %r +} + +define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { +; CHECK-LABEL: in_v1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <1 x i16> %x, %y + %n1 = and <1 x i16> %n0, %mask + %r = xor <1 x i16> %n1, %y + ret <1 x i16> %r +} + +; ============================================================================ ; +; 32-bit vector width +; ============================================================================ ; + +define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { +; CHECK-LABEL: in_v4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <4 x i8> %x, %y + %n1 = and <4 x i8> %n0, %mask + %r = xor <4 x i8> %n1, %y + ret <4 x i8> %r +} + +define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { +; CHECK-LABEL: in_v2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <2 x i16> %x, %y + %n1 = and <2 x i16> %n0, %mask + %r = xor <2 x i16> %n1, %y + ret <2 x i16> %r +} + +define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { +; CHECK-LABEL: in_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <1 x i32> %x, %y + %n1 = and <1 x i32> %n0, %mask + %r = xor <1 x i32> %n1, %y + ret <1 x i32> %r +} + +; ============================================================================ ; +; 64-bit vector width +; ============================================================================ ; + +define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { +; CHECK-LABEL: in_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <8 x i8> %x, %y + %n1 = and <8 x i8> %n0, %mask + %r = xor <8 x i8> %n1, %y + ret <8 x i8> %r +} + +define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { +; CHECK-LABEL: in_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <4 x i16> %x, %y + %n1 = and <4 x i16> %n0, %mask + %r = xor <4 x i16> %n1, %y + ret <4 x i16> %r +} + +define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { +; CHECK-LABEL: in_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <2 x i32> %x, %y + %n1 = and <2 x i32> %n0, %mask + %r = xor <2 x i32> %n1, %y + ret <2 x i32> %r +} + +define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { +; CHECK-LABEL: in_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <1 x i64> %x, %y + %n1 = and <1 x i64> %n0, %mask + %r = xor <1 x i64> %n1, %y + ret <1 x i64> %r +} + +; ============================================================================ ; +; 128-bit vector width +; ============================================================================ ; + +define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { +; CHECK-LABEL: in_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <16 x i8> %x, %y + %n1 = and <16 x i8> %n0, %mask + %r = xor <16 x i8> %n1, %y + ret <16 x i8> %r +} + +define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { +; CHECK-LABEL: in_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <8 x i16> %x, %y + %n1 = and <8 x i16> %n0, %mask + %r = xor <8 x i16> %n1, %y + ret <8 x i16> %r +} + +define <4 x i32> @in_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { +; CHECK-LABEL: in_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, %mask + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { +; CHECK-LABEL: in_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %n0 = xor <2 x i64> %x, %y + %n1 = and <2 x i64> %n0, %mask + %r = xor <2 x i64> %n1, %y + ret <2 x i64> %r +} diff --git a/test/CodeGen/AArch64/unreachable-emergency-spill-slot.mir b/test/CodeGen/AArch64/unreachable-emergency-spill-slot.mir new file mode 100644 index 000000000000..5cf3bf99b1f2 --- /dev/null +++ b/test/CodeGen/AArch64/unreachable-emergency-spill-slot.mir @@ -0,0 +1,38 @@ +#RUN: llc -mtriple=aarch64-- -run-pass prologepilog %s -o - | FileCheck %s +# Check that we use the frame pointer to address the emergency spill slot. +# Using the base pointer will result in an assert with "Emergency spill slot is +# out of reach". +--- +name: hugeStack +# CHECK-LABEL: name: hugeStack +tracksRegLiveness: true +frameInfo: + localFrameSize: 256 +stack: + - { id: 0, type: variable-sized, alignment: 1 } + - { id: 1, name: '', size: 32761, alignment: 8 } +body: | + bb.0: + STRXui undef $x8, %stack.0, 0 + ; CHECK: STURXi undef $x8, $fp, -24 + B %bb.1 + bb.1: + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp, $lr + RET_ReallyLR implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28 +... +--- +name: fpDoesNotFit +# CHECK-LABEL: name: fpDoesNotFit +tracksRegLiveness: true +frameInfo: + hasStackMap: true +# set to true to force hasFP to true. +stack: + - { id: 0, name: '', size: 4096, alignment: 8 } + - { id: 1, name: '', size: 32761, alignment: 8 } +body: | + bb.0: + STRXui undef $x8, %stack.0, -34 + ; Pick SP here. Picking FP will require scavenging a register. + ; CHECK: STRXui undef $x8, $sp, 4062 + RET_ReallyLR diff --git a/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll b/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll new file mode 100644 index 000000000000..999958b3025d --- /dev/null +++ b/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll @@ -0,0 +1,31 @@ +; REQUIRES: arm-registered-target +; REQUIRES: asserts +; RUN: llc -o /dev/null %s -debug-only=legalize-types 2>&1 | FileCheck %s + +; This test check that when v4f64 gets broken down to two v2f64 it maintains +; the "nnan" flags. + +; CHECK: Legalizing node: [[VFOUR:t.*]]: v4f64 = BUILD_VECTOR +; CHECK-NEXT: Analyzing result type: v4f64 +; CHECK-NEXT: Split node result: [[VFOUR]]: v4f64 = BUILD_VECTOR + +; CHECK: Legalizing node: [[VTWO:t.*]]: v2f64 = BUILD_VECTOR +; CHECK: Legally typed node: [[VTWO]]: v2f64 = BUILD_VECTOR +; CHECK: Legalizing node: t26: v2f64 = fmaxnum nnan reassoc [[VTWO]], [[VTWO]] + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + + +; Function Attrs: norecurse nounwind +define fastcc double @test() unnamed_addr #1 { +entry: + %0 = insertelement <4 x double> undef, double 1.0, i32 0 + %1 = insertelement <4 x double> %0, double 1.0, i32 1 + %2 = insertelement <4 x double> %1, double 1.0, i32 2 + %3 = insertelement <4 x double> %2, double 1.0, i32 3 + %4 = call nnan reassoc double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %3) + ret double %4 +} + +declare double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double>) diff --git a/test/CodeGen/AArch64/win-alloca-no-stack-probe.ll b/test/CodeGen/AArch64/win-alloca-no-stack-probe.ll new file mode 100644 index 000000000000..0ab161f8f27a --- /dev/null +++ b/test/CodeGen/AArch64/win-alloca-no-stack-probe.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple aarch64-windows -verify-machineinstrs -filetype asm -o - %s | FileCheck %s + +define void @func(i64 %a) "no-stack-arg-probe" { +entry: + %0 = alloca i8, i64 %a, align 16 + call void @func2(i8* nonnull %0) + ret void +} + +declare void @func2(i8*) + +; CHECK: add [[REG1:x[0-9]+]], x0, #15 +; CHECK-NOT: bl __chkstk +; CHECK: mov [[REG2:x[0-9]+]], sp +; CHECK: and [[REG1]], [[REG1]], #0xfffffffffffffff0 +; CHECK: sub [[REG3:x[0-9]+]], [[REG2]], [[REG1]] +; CHECK: mov sp, [[REG3]] diff --git a/test/CodeGen/AArch64/win-alloca.ll b/test/CodeGen/AArch64/win-alloca.ll new file mode 100644 index 000000000000..f26b33e24a74 --- /dev/null +++ b/test/CodeGen/AArch64/win-alloca.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple aarch64-windows -verify-machineinstrs -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-OPT +; RUN: llc -mtriple aarch64-windows -verify-machineinstrs -filetype asm -o - %s -O0 | FileCheck %s + +define void @func(i64 %a) { +entry: + %0 = alloca i8, i64 %a, align 16 + call void @func2(i8* nonnull %0) + ret void +} + +declare void @func2(i8*) + +; The -O0 version here ends up much less elegant, so just check the +; details of the optimized form, but check that -O0 at least emits the +; call to __chkstk. + +; CHECK: add [[REG1:x[0-9]+]], x0, #15 +; CHECK-OPT: lsr x15, [[REG1]], #4 +; CHECK: bl __chkstk +; CHECK: mov [[REG2:x[0-9]+]], sp +; CHECK-OPT: sub [[REG3:x[0-9]+]], [[REG2]], x15, lsl #4 +; CHECK-OPT: mov sp, [[REG3]] +; CHECK: bl func2 diff --git a/test/CodeGen/AArch64/win-tls.ll b/test/CodeGen/AArch64/win-tls.ll new file mode 100644 index 000000000000..ea49b99bbaae --- /dev/null +++ b/test/CodeGen/AArch64/win-tls.ll @@ -0,0 +1,64 @@ +; RUN: llc -mtriple aarch64-windows %s -o - | FileCheck %s + +@tlsVar = thread_local global i32 0 +@tlsVar8 = thread_local global i8 0 +@tlsVar64 = thread_local global i64 0 + +define i32 @getVar() { + %1 = load i32, i32* @tlsVar + ret i32 %1 +} + +define i32* @getPtr() { + ret i32* @tlsVar +} + +define void @setVar(i32 %val) { + store i32 %val, i32* @tlsVar + ret void +} + +define i8 @getVar8() { + %1 = load i8, i8* @tlsVar8 + ret i8 %1 +} + +define i64 @getVar64() { + %1 = load i64, i64* @tlsVar64 + ret i64 %1 +} + +; CHECK-LABEL: getVar +; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index +; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], _tls_index] +; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88] + +; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3] +; CHECK: add [[TLS]], [[TLS]], :secrel_hi12:tlsVar +; CHECK: ldr w0, {{\[}}[[TLS]], :secrel_lo12:tlsVar{{\]}} + +; CHECK-LABEL: getPtr +; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index +; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], _tls_index] +; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88] + +; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3] +; CHECK: add [[TLS]], [[TLS]], :secrel_hi12:tlsVar +; CHECK: add x0, [[TLS]], :secrel_lo12:tlsVar + +; CHECK-LABEL: setVar +; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index +; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], _tls_index] +; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88] + +; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3] +; CHECK: add [[TLS]], [[TLS]], :secrel_hi12:tlsVar +; CHECK: str w0, {{\[}}[[TLS]], :secrel_lo12:tlsVar{{\]}} + +; CHECK-LABEL: getVar8 +; CHECK: add [[TLS:x[0-9]+]], [[TLS]], :secrel_hi12:tlsVar8 +; CHECK: ldrb w0, {{\[}}[[TLS]], :secrel_lo12:tlsVar8{{\]}} + +; CHECK-LABEL: getVar64 +; CHECK: add [[TLS:x[0-9]+]], [[TLS]], :secrel_hi12:tlsVar64 +; CHECK: ldr x0, {{\[}}[[TLS]], :secrel_lo12:tlsVar64{{\]}} diff --git a/test/CodeGen/AArch64/win64_vararg.ll b/test/CodeGen/AArch64/win64_vararg.ll index 6fcbfcb62dcf..9cc9f50adb77 100644 --- a/test/CodeGen/AArch64/win64_vararg.ll +++ b/test/CodeGen/AArch64/win64_vararg.ll @@ -159,27 +159,28 @@ attributes #6 = { "no-frame-pointer-elim"="true" } ; CHECK: stur x8, [x29, #-40] ; CHECK: mov w8, w0 ; CHECK: add x8, x8, #15 -; CHECK: mov x9, sp -; CHECK: and x8, x8, #0x1fffffff0 -; CHECK: sub x20, x9, x8 +; CHECK: lsr x15, x8, #4 ; CHECK: mov x19, x1 -; CHECK: mov x23, sp +; CHECK: mov [[REG2:x[0-9]+]], sp ; CHECK: stp x6, x7, [x29, #48] ; CHECK: stp x4, x5, [x29, #32] ; CHECK: stp x2, x3, [x29, #16] -; CHECK: mov sp, x20 -; CHECK: ldur x21, [x29, #-40] -; CHECK: sxtw x22, w0 +; CHECK: bl __chkstk +; CHECK: mov x8, sp +; CHECK: sub [[REG:x[0-9]+]], x8, x15, lsl #4 +; CHECK: mov sp, [[REG]] +; CHECK: ldur [[REG3:x[0-9]+]], [x29, #-40] +; CHECK: sxtw [[REG4:x[0-9]+]], w0 ; CHECK: bl __local_stdio_printf_options ; CHECK: ldr x8, [x0] -; CHECK: mov x1, x20 -; CHECK: mov x2, x22 +; CHECK: mov x1, [[REG]] +; CHECK: mov x2, [[REG4]] ; CHECK: mov x3, x19 ; CHECK: orr x0, x8, #0x2 ; CHECK: mov x4, xzr -; CHECK: mov x5, x21 +; CHECK: mov x5, [[REG3]] ; CHECK: bl __stdio_common_vsprintf -; CHECK: mov sp, x23 +; CHECK: mov sp, [[REG2]] ; CHECK: sub sp, x29, #48 ; CHECK: ldp x29, x30, [sp, #48] ; CHECK: ldp x20, x19, [sp, #32] @@ -255,17 +256,15 @@ define i32 @snprintf(i8*, i64, i8*, ...) local_unnamed_addr #5 { ; CHECK-LABEL: fixed_params ; CHECK: sub sp, sp, #32 -; CHECK: mov w8, w3 -; CHECK: mov w9, w2 -; CHECK: mov w10, w1 +; CHECK-DAG: mov w6, w3 +; CHECK-DAG: mov [[REG1:w[0-9]+]], w2 +; CHECK: mov w2, w1 ; CHECK: str w4, [sp] ; CHECK: fmov x1, d0 ; CHECK: fmov x3, d1 ; CHECK: fmov x5, d2 ; CHECK: fmov x7, d3 -; CHECK: mov w2, w10 -; CHECK: mov w4, w9 -; CHECK: mov w6, w8 +; CHECK: mov w4, [[REG1]] ; CHECK: str x30, [sp, #16] ; CHECK: str d4, [sp, #8] ; CHECK: bl varargs diff --git a/test/CodeGen/AArch64/win_cst_pool.ll b/test/CodeGen/AArch64/win_cst_pool.ll new file mode 100644 index 000000000000..5bcc9194c796 --- /dev/null +++ b/test/CodeGen/AArch64/win_cst_pool.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=aarch64-win32-msvc | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-win32-gnu | FileCheck -check-prefix=MINGW %s + +define double @double() { + ret double 0x0000000000800000 +} +; CHECK: .globl __real@0000000000800000 +; CHECK-NEXT: .section .rdata,"dr",discard,__real@0000000000800000 +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: __real@0000000000800000: +; CHECK-NEXT: .xword 8388608 +; CHECK: double: +; CHECK: adrp x8, __real@0000000000800000 +; CHECK-NEXT: ldr d0, [x8, __real@0000000000800000] +; CHECK-NEXT: ret + +; MINGW: .section .rdata,"dr" +; MINGW-NEXT: .p2align 3 +; MINGW-NEXT: [[LABEL:\.LC.*]]: +; MINGW-NEXT: .xword 8388608 +; MINGW: double: +; MINGW: adrp x8, [[LABEL]] +; MINGW-NEXT: ldr d0, [x8, [[LABEL]]] +; MINGW-NEXT: ret diff --git a/test/CodeGen/AArch64/zext-logic-shift-load.ll b/test/CodeGen/AArch64/zext-logic-shift-load.ll new file mode 100644 index 000000000000..a75862cfc3b3 --- /dev/null +++ b/test/CodeGen/AArch64/zext-logic-shift-load.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple=aarch64-linux-gnu < %s -o - | FileCheck %s + +define i32 @test1(i8* %p) { +; CHECK: ldrb +; CHECK-NEXT: ubfx +; CHECK-NEXT: ret + + %1 = load i8, i8* %p + %2 = lshr i8 %1, 1 + %3 = and i8 %2, 1 + %4 = zext i8 %3 to i32 + ret i32 %4 +} + |
