diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2015-06-21 13:59:01 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2015-06-21 13:59:01 +0000 |
| commit | 3a0822f094b578157263e04114075ad7df81db41 (patch) | |
| tree | bc48361fe2cd1ca5f93ac01b38b183774468fc79 /test/CodeGen/AArch64 | |
| parent | 85d8b2bbe386bcfe669575d05b61482d7be07e5d (diff) | |
Notes
Diffstat (limited to 'test/CodeGen/AArch64')
| -rw-r--r-- | test/CodeGen/AArch64/arm64-atomic.ll | 16 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/arm64-big-endian-eh.ll | 8 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/arm64-ccmp.ll | 40 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll | 8 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/arm64-neon-2velem-high.ll | 457 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/arm64-stp.ll | 32 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/arm64-strict-align.ll | 1 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/br-to-eh-lpad.ll | 6 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/ifcvt-select.ll | 41 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/pic-eh-stubs.ll | 4 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/simple-macho.ll | 12 |
11 files changed, 461 insertions, 164 deletions
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll index 9136fb6271b5..0824bd881a95 100644 --- a/test/CodeGen/AArch64/arm64-atomic.ll +++ b/test/CodeGen/AArch64/arm64-atomic.ll @@ -14,6 +14,22 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 { ret i32 %val } +define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 { +; CHECK-LABEL: val_compare_and_swap_from_load: +; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2] +; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0] +; CHECK-NEXT: cmp [[RESULT]], w1 +; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0] +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK-NEXT: [[LABEL2]]: + %new = load i32, i32* %pnew + %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire + %val = extractvalue { i32, i1 } %pair, 0 + ret i32 %val +} + define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 { ; CHECK-LABEL: val_compare_and_swap_rel: ; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: diff --git a/test/CodeGen/AArch64/arm64-big-endian-eh.ll b/test/CodeGen/AArch64/arm64-big-endian-eh.ll index a51703a8fc4b..77d52e32d3a0 100644 --- a/test/CodeGen/AArch64/arm64-big-endian-eh.ll +++ b/test/CodeGen/AArch64/arm64-big-endian-eh.ll @@ -14,13 +14,13 @@ ; } ;} -define void @_Z4testii(i32 %a, i32 %b) #0 { +define void @_Z4testii(i32 %a, i32 %b) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z3fooi(i32 %a) to label %try.cont unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = tail call i8* @__cxa_begin_catch(i8* %1) #2 @@ -35,7 +35,7 @@ try.cont: ; preds = %entry, %invoke.cont ret void lpad1: ; preds = %lpad - %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %3 = landingpad { i8*, i32 } cleanup invoke void @__cxa_end_catch() to label %eh.resume unwind label %terminate.lpad @@ -44,7 +44,7 @@ eh.resume: ; preds = %lpad1 resume { i8*, i32 } %3 terminate.lpad: ; preds = %lpad1 - %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %4 = landingpad { i8*, i32 } catch i8* null %5 = extractvalue { i8*, i32 } %4, 0 tail call void @__clang_call_terminate(i8* %5) #3 diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll index 11228c7e8808..ff18f7364337 100644 --- a/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -287,43 +287,3 @@ sw.bb.i.i: %code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16 br label %sw.bb.i.i } - -; CHECK-LABEL: select_and -define i64 @select_and(i32 %v1, i32 %v2, i64 %a, i64 %b) { -; CHECK: cmp -; CHECK: ccmp{{.*}}, #0, ne -; CHECK: csel{{.*}}, lt - %1 = icmp slt i32 %v1, %v2 - %2 = icmp ne i32 5, %v2 - %3 = and i1 %1, %2 - %sel = select i1 %3, i64 %a, i64 %b - ret i64 %sel -} - -; CHECK-LABEL: select_or -define i64 @select_or(i32 %v1, i32 %v2, i64 %a, i64 %b) { -; CHECK: cmp -; CHECK: ccmp{{.*}}, #8, eq -; CHECK: csel{{.*}}, lt - %1 = icmp slt i32 %v1, %v2 - %2 = icmp ne i32 5, %v2 - %3 = or i1 %1, %2 - %sel = select i1 %3, i64 %a, i64 %b - ret i64 %sel -} - -; CHECK-LABEL: select_complicated -define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) { -; CHECK: fcmp -; CHECK: fccmp{{.*}}, #4, ne -; CHECK: fccmp{{.*}}, #1, ne -; CHECK: fccmp{{.*}}, #4, vc -; CEHCK: csel{{.*}}, eq - %1 = fcmp one double %v1, %v2 - %2 = fcmp oeq double %v2, 13.0 - %3 = fcmp oeq double %v1, 42.0 - %or0 = or i1 %2, %3 - %or1 = or i1 %1, %or0 - %sel = select i1 %or1, i16 %a, i16 %b - ret i16 %sel -} diff --git a/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll b/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll index f0b8299a66e3..c9f668f2c424 100644 --- a/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll +++ b/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll @@ -24,7 +24,7 @@ false: } ; Check that we manage to form a zextload is an operation with only one -; argument to explicitly extend is in the the way. +; argument to explicitly extend is in the way. ; OPTALL-LABEL: @promoteOneArg ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p ; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 @@ -49,7 +49,7 @@ false: } ; Check that we manage to form a sextload is an operation with only one -; argument to explicitly extend is in the the way. +; argument to explicitly extend is in the way. ; Version with sext. ; OPTALL-LABEL: @promoteOneArgSExt ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p @@ -74,7 +74,7 @@ false: } ; Check that we manage to form a zextload is an operation with two -; arguments to explicitly extend is in the the way. +; arguments to explicitly extend is in the way. ; Extending %add will create two extensions: ; 1. One for %b. ; 2. One for %t. @@ -113,7 +113,7 @@ false: } ; Check that we manage to form a sextload is an operation with two -; arguments to explicitly extend is in the the way. +; arguments to explicitly extend is in the way. ; Version with sext. ; OPTALL-LABEL: @promoteTwoArgSExt ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p diff --git a/test/CodeGen/AArch64/arm64-neon-2velem-high.ll b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll index 58df094d1922..3ff1e61d0298 100644 --- a/test/CodeGen/AArch64/arm64-neon-2velem-high.ll +++ b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll @@ -1,270 +1,484 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s +; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \ +; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s -declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) - -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) - -declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) - -declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) - -declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) - -define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) { +define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { ; CHECK-LABEL: test_vmull_high_n_s16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) ret <4 x i32> %vmull15.i.i } -define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) { +define <4 x i32> @test_vmull_high_n_s16_imm(<8 x i16> %a) #0 { +; CHECK-LABEL: test_vmull_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) + ret <4 x i32> %vmull15.i.i +} + +define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { ; CHECK-LABEL: test_vmull_high_n_s32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + ret <2 x i64> %vmull9.i.i +} + +define <2 x i64> @test_vmull_high_n_s32_imm(<4 x i32> %a) #0 { +; CHECK-LABEL: test_vmull_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8 +; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 511, i32 511>) ret <2 x i64> %vmull9.i.i } -define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) { +define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) #0 { ; CHECK-LABEL: test_vmull_high_n_u16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) ret <4 x i32> %vmull15.i.i } -define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) { +define <4 x i32> @test_vmull_high_n_u16_imm(<8 x i16> %a) #0 { +; CHECK-LABEL: test_vmull_high_n_u16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8 +; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 4352, i16 4352, i16 4352, i16 4352>) + ret <4 x i32> %vmull15.i.i +} + +define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) #0 { ; CHECK-LABEL: test_vmull_high_n_u32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) ret <2 x i64> %vmull9.i.i } -define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) { +define <2 x i64> @test_vmull_high_n_u32_imm(<4 x i32> %a) #0 { +; CHECK-LABEL: test_vmull_high_n_u32_imm: +; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].4s, #0x1, msl #8 +; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 4294966784, i32 4294966784>) + ret <2 x i64> %vmull9.i.i +} + +define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { ; CHECK-LABEL: test_vqdmull_high_n_s16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + ret <4 x i32> %vqdmull15.i.i +} + +define <4 x i32> @test_vqdmull_high_n_s16_imm(<8 x i16> %a) #0 { +; CHECK-LABEL: test_vqdmull_high_n_s16_imm: +; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].8h, #0x11, lsl #8 +; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 61183, i16 61183, i16 61183, i16 61183>) ret <4 x i32> %vqdmull15.i.i } -define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) { +define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { ; CHECK-LABEL: test_vqdmull_high_n_s32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + ret <2 x i64> %vqdmull9.i.i +} + +define <2 x i64> @test_vqdmull_high_n_s32_imm(<4 x i32> %a) #0 { +; CHECK-LABEL: test_vqdmull_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) ret <2 x i64> %vqdmull9.i.i } -define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vmlal_high_n_s16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %add.i.i = add <4 x i32> %vmull2.i.i.i, %a + ret <4 x i32> %add.i.i +} + +define <4 x i32> @test_vmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vmlal_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) %add.i.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i.i } -define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vmlal_high_n_s32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %add.i.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i.i } -define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <2 x i64> @test_vmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vmlal_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %add.i.i = add <2 x i64> %vmull2.i.i.i, %a + ret <2 x i64> %add.i.i +} + +define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vmlal_high_n_u16: -; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 -; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %add.i.i = add <4 x i32> %vmull2.i.i.i, %a + ret <4 x i32> %add.i.i +} + +define <4 x i32> @test_vmlal_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vmlal_high_n_u16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) %add.i.i = add <4 x i32> %vmull2.i.i.i, %a ret <4 x i32> %add.i.i } -define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vmlal_high_n_u32: -; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 -; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %add.i.i = add <2 x i64> %vmull2.i.i.i, %a ret <2 x i64> %add.i.i } -define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <2 x i64> @test_vmlal_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vmlal_high_n_u32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %add.i.i = add <2 x i64> %vmull2.i.i.i, %a + ret <2 x i64> %add.i.i +} + +define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vqdmlal_high_n_s16: -; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) + %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) + ret <4 x i32> %vqdmlal17.i.i +} + +define <4 x i32> @test_vqdmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vqdmlal_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) + %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) ret <4 x i32> %vqdmlal17.i.i } -define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vqdmlal_high_n_s32: -; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) + %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) + ret <2 x i64> %vqdmlal11.i.i +} + +define <2 x i64> @test_vqdmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vqdmlal_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) ret <2 x i64> %vqdmlal11.i.i } -define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vmlsl_high_n_s16: -; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i + ret <4 x i32> %sub.i.i +} + +define <4 x i32> @test_vmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vmlsl_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i ret <4 x i32> %sub.i.i } -define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vmlsl_high_n_s32: -; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i ret <2 x i64> %sub.i.i } -define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <2 x i64> @test_vmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vmlsl_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i + ret <2 x i64> %sub.i.i +} + +define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vmlsl_high_n_u16: -; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i + ret <4 x i32> %sub.i.i +} + +define <4 x i32> @test_vmlsl_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vmlsl_high_n_u16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i ret <4 x i32> %sub.i.i } -define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vmlsl_high_n_u32: -; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i ret <2 x i64> %sub.i.i } -define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +define <2 x i64> @test_vmlsl_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vmlsl_high_n_u32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i + ret <2 x i64> %sub.i.i +} + +define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { ; CHECK-LABEL: test_vqdmlsl_high_n_s16: -; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0 +; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) + %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) + ret <4 x i32> %vqdmlsl17.i.i +} + +define <4 x i32> @test_vqdmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 { +; CHECK-LABEL: test_vqdmlsl_high_n_s16_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #0x1d +; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>) + %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) ret <4 x i32> %vqdmlsl17.i.i } -define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { ; CHECK-LABEL: test_vqdmlsl_high_n_s32: -; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0 +; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret entry: %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) + %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) + ret <2 x i64> %vqdmlsl11.i.i +} + +define <2 x i64> @test_vqdmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: test_vqdmlsl_high_n_s32_imm: +; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #0x1d +; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s +; CHECK-NEXT: ret +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> + %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>) + %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) ret <2 x i64> %vqdmlsl11.i.i } -define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) { +define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 { ; CHECK-LABEL: test_vmul_n_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1 @@ -272,9 +486,10 @@ entry: ret <2 x float> %mul.i } -define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) { +define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 { ; CHECK-LABEL: test_vmulq_n_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1 @@ -284,9 +499,10 @@ entry: ret <4 x float> %mul.i } -define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) { +define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 { ; CHECK-LABEL: test_vmulq_n_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x double> undef, double %b, i32 0 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1 @@ -294,48 +510,67 @@ entry: ret <2 x double> %mul.i } -define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) { +define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { ; CHECK-LABEL: test_vfma_n_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) + %0 = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) ret <2 x float> %0 } -define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { +define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { ; CHECK-LABEL: test_vfmaq_n_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) + %0 = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) ret <4 x float> %0 } -define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) { +define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { ; CHECK-LABEL: test_vfms_n_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b - %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) + %1 = call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) ret <2 x float> %1 } -define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { +define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { ; CHECK-LABEL: test_vfmsq_n_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +; CHECK-NEXT: ret entry: %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b - %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) + %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) ret <4 x float> %1 } + +attributes #0 = { nounwind } + +declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) +declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) +declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) +declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll index 4d76396471ad..72561aac6e87 100644 --- a/test/CodeGen/AArch64/arm64-stp.ll +++ b/test/CodeGen/AArch64/arm64-stp.ll @@ -99,3 +99,35 @@ entry: store <4 x i32> %p20, <4 x i32>* %p21, align 4 ret void } + +; Read of %b to compute %tmp2 shouldn't prevent formation of stp +; CHECK-LABEL: stp_int_rar_hazard +; CHECK: stp w0, w1, [x2] +; CHECK: ldr [[REG:w[0-9]+]], [x2, #8] +; CHECK: add w0, [[REG]], w1 +; CHECK: ret +define i32 @stp_int_rar_hazard(i32 %a, i32 %b, i32* nocapture %p) nounwind { + store i32 %a, i32* %p, align 4 + %ld.ptr = getelementptr inbounds i32, i32* %p, i64 2 + %tmp = load i32, i32* %ld.ptr, align 4 + %tmp2 = add i32 %tmp, %b + %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %b, i32* %add.ptr, align 4 + ret i32 %tmp2 +} + +; Read of %b to compute %tmp2 shouldn't prevent formation of stp +; CHECK-LABEL: stp_int_rar_hazard_after +; CHECK: ldr [[REG:w[0-9]+]], [x3, #4] +; CHECK: add w0, [[REG]], w2 +; CHECK: stp w1, w2, [x3] +; CHECK: ret +define i32 @stp_int_rar_hazard_after(i32 %w0, i32 %a, i32 %b, i32* nocapture %p) nounwind { + store i32 %a, i32* %p, align 4 + %ld.ptr = getelementptr inbounds i32, i32* %p, i64 1 + %tmp = load i32, i32* %ld.ptr, align 4 + %tmp2 = add i32 %tmp, %b + %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %b, i32* %add.ptr, align 4 + ret i32 %tmp2 +} diff --git a/test/CodeGen/AArch64/arm64-strict-align.ll b/test/CodeGen/AArch64/arm64-strict-align.ll index b707527f3c0c..109f4115d801 100644 --- a/test/CodeGen/AArch64/arm64-strict-align.ll +++ b/test/CodeGen/AArch64/arm64-strict-align.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s ; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT +; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT define i32 @f0(i32* nocapture %p) nounwind { ; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2] diff --git a/test/CodeGen/AArch64/br-to-eh-lpad.ll b/test/CodeGen/AArch64/br-to-eh-lpad.ll index f304ba4ca286..2ac9e9043339 100644 --- a/test/CodeGen/AArch64/br-to-eh-lpad.ll +++ b/test/CodeGen/AArch64/br-to-eh-lpad.ll @@ -7,12 +7,12 @@ ; that case, the machine verifier, which relies on analyzing branches for this ; kind of verification, is unable to check anything, so accepts the CFG. -define void @test_branch_to_landingpad() { +define void @test_branch_to_landingpad() personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) { entry: br i1 undef, label %if.end50.thread, label %if.then6 lpad: - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765* @"OBJC_EHTYPE_$_NSString" catch %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765* @OBJC_EHTYPE_id catch i8* null @@ -46,7 +46,7 @@ invoke.cont43: unreachable lpad40: - %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + %1 = landingpad { i8*, i32 } catch i8* null br label %finally.catchall diff --git a/test/CodeGen/AArch64/ifcvt-select.ll b/test/CodeGen/AArch64/ifcvt-select.ll new file mode 100644 index 000000000000..4e024d963f20 --- /dev/null +++ b/test/CodeGen/AArch64/ifcvt-select.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s +; Do not generate redundant select in early if-converstion pass. + +define i32 @foo(i32 %a, i32 %b) { +entry: +;CHECK-LABEL: foo: +;CHECK: csinc +;CHECK-NOT: csel + %sub = sub nsw i32 %b, %a + %cmp10 = icmp sgt i32 %a, 0 + br i1 %cmp10, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: + br label %while.body + +while.body: + %j.012 = phi i32 [ %sub, %while.body.lr.ph ], [ %inc, %if.then ], [ %inc, %if.else ] + %i.011 = phi i32 [ %a, %while.body.lr.ph ], [ %inc2, %if.then ], [ %dec, %if.else ] + %cmp1 = icmp slt i32 %i.011, %j.012 + br i1 %cmp1, label %while.end, label %while.cond + +while.cond: + %inc = add nsw i32 %j.012, 5 + %cmp2 = icmp slt i32 %inc, %b + br i1 %cmp2, label %if.then, label %if.else + +if.then: + %inc2 = add nsw i32 %i.011, 1 + br label %while.body + +if.else: + %dec = add nsw i32 %i.011, -1 + br label %while.body + +while.end: + %j.0.lcssa = phi i32 [ %j.012, %while.body ], [ %sub, %entry ] + %i.0.lcssa = phi i32 [ %i.011, %while.body ], [ %a, %entry ] + %add = add nsw i32 %j.0.lcssa, %i.0.lcssa + ret i32 %add +} + diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll index f761a87783ce..143558f7b2c7 100644 --- a/test/CodeGen/AArch64/pic-eh-stubs.ll +++ b/test/CodeGen/AArch64/pic-eh-stubs.ll @@ -21,13 +21,13 @@ @_ZTIi = external constant i8* -define i32 @_Z3barv() { +define i32 @_Z3barv() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @_Z3foov() to label %return unwind label %lpad lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + %0 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*) %1 = extractvalue { i8*, i32 } %0, 1 %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind diff --git a/test/CodeGen/AArch64/simple-macho.ll b/test/CodeGen/AArch64/simple-macho.ll new file mode 100644 index 000000000000..e9dd98e230db --- /dev/null +++ b/test/CodeGen/AArch64/simple-macho.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=arm64-macho -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-macho -filetype=obj -o %t %s +; RUN: llvm-objdump -triple=arm64-macho -d %t | FileCheck --check-prefix=CHECK-OBJ %s + +define void @foo() { +; CHECK-LABEL: _foo: +; CHECK: ret + +; CHECK-OBJ: 0: c0 03 5f d6 ret + + ret void +}
\ No newline at end of file |
