diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:17:04 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:17:04 +0000 |
commit | b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch) | |
tree | 98b8f811c7aff2547cab8642daf372d6c59502fb /test/CodeGen/X86/avx2-vbroadcast.ll | |
parent | 6421cca32f69ac849537a3cff78c352195e99f1b (diff) | |
download | src-test-b915e9e0fc85ba6f398b3fab0db6a81a8913af94.tar.gz src-test-b915e9e0fc85ba6f398b3fab0db6a81a8913af94.zip |
Notes
Diffstat (limited to 'test/CodeGen/X86/avx2-vbroadcast.ll')
-rw-r--r-- | test/CodeGen/X86/avx2-vbroadcast.ll | 785 |
1 files changed, 716 insertions, 69 deletions
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 2ecf2fa5a6e7d..9b4d776b29e33 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX2 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512VL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp { ; X32-LABEL: BB16: @@ -207,22 +209,34 @@ entry: } define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp { -; X32-LABEL: QQ64: -; X32: ## BB#0: ## %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl (%eax), %ecx -; X32-NEXT: movl 4(%eax), %eax -; X32-NEXT: vmovd %ecx, %xmm0 -; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: retl +; X32-AVX2-LABEL: QQ64: +; X32-AVX2: ## BB#0: ## %entry +; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX2-NEXT: movl (%eax), %ecx +; X32-AVX2-NEXT: movl 4(%eax), %eax +; X32-AVX2-NEXT: vmovd %ecx, %xmm0 +; X32-AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X32-AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 +; X32-AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X32-AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX2-NEXT: retl ; ; X64-LABEL: QQ64: ; X64: ## BB#0: ## %entry ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 ; X64-NEXT: retq +; +; X32-AVX512VL-LABEL: QQ64: +; X32-AVX512VL: ## BB#0: ## %entry +; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512VL-NEXT: movl (%eax), %ecx +; X32-AVX512VL-NEXT: movl 4(%eax), %eax +; X32-AVX512VL-NEXT: vmovd %ecx, %xmm0 +; X32-AVX512VL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 +; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X32-AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512VL-NEXT: retl entry: %q = load i64, i64* %ptr, align 4 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0 @@ -232,6 +246,74 @@ entry: ret <4 x i64> %q3 } +define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) { +; X32-AVX2-LABEL: broadcast_mem_v4i16_v8i16: +; X32-AVX2: ## BB#0: +; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X32-AVX2-NEXT: retl +; +; X64-AVX2-LABEL: broadcast_mem_v4i16_v8i16: +; X64-AVX2: ## BB#0: +; X64-AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16: +; X32-AVX512VL: ## BB#0: +; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X32-AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13] +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16: +; X64-AVX512VL: ## BB#0: +; X64-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13] +; X64-AVX512VL-NEXT: retq + %load = load <4 x i16>, <4 x i16>* %ptr + %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> + ret <8 x i16> %shuf +} + +define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) { +; X32-AVX2-LABEL: broadcast_mem_v4i16_v16i16: +; X32-AVX2: ## BB#0: +; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; X32-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,4,5,6,7,6,7],zero,zero +; X32-AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 +; X32-AVX2-NEXT: retl +; +; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16: +; X64-AVX2: ## BB#0: +; X64-AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; X64-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,4,5,6,7,6,7],zero,zero +; X64-AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16: +; X32-AVX512VL: ## BB#0: +; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X32-AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; X32-AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; X32-AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X32-AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0 +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16: +; X64-AVX512VL: ## BB#0: +; X64-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; X64-AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; X64-AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0 +; X64-AVX512VL-NEXT: retq + %load = load <4 x i16>, <4 x i16>* %ptr + %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> + ret <16 x i16> %shuf +} + ; FIXME: Pointer adjusted broadcasts define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { @@ -561,34 +643,54 @@ entry: } define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp { -; X32-LABEL: V111: -; X32: ## BB#0: ## %entry -; X32-NEXT: vpbroadcastd LCPI27_0, %ymm1 -; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; X32-NEXT: retl +; X32-AVX2-LABEL: V111: +; X32-AVX2: ## BB#0: ## %entry +; X32-AVX2-NEXT: vpbroadcastd LCPI29_0, %ymm1 +; X32-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; X32-AVX2-NEXT: retl ; -; X64-LABEL: V111: -; X64: ## BB#0: ## %entry -; X64-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 -; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; X64-NEXT: retq +; X64-AVX2-LABEL: V111: +; X64-AVX2: ## BB#0: ## %entry +; X64-AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: V111: +; X32-AVX512VL: ## BB#0: ## %entry +; X32-AVX512VL-NEXT: vpaddd LCPI29_0{1to8}, %ymm0, %ymm0 +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: V111: +; X64-AVX512VL: ## BB#0: ## %entry +; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; X64-AVX512VL-NEXT: retq entry: %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ret <8 x i32> %g } define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp { -; X32-LABEL: V113: -; X32: ## BB#0: ## %entry -; X32-NEXT: vbroadcastss LCPI28_0, %ymm1 -; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0 -; X32-NEXT: retl +; X32-AVX2-LABEL: V113: +; X32-AVX2: ## BB#0: ## %entry +; X32-AVX2-NEXT: vbroadcastss LCPI30_0, %ymm1 +; X32-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; X32-AVX2-NEXT: retl ; -; X64-LABEL: V113: -; X64: ## BB#0: ## %entry -; X64-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 -; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 -; X64-NEXT: retq +; X64-AVX2-LABEL: V113: +; X64-AVX2: ## BB#0: ## %entry +; X64-AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; X64-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: V113: +; X32-AVX512VL: ## BB#0: ## %entry +; X32-AVX512VL-NEXT: vaddps LCPI30_0{1to8}, %ymm0, %ymm0 +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: V113: +; X64-AVX512VL: ## BB#0: ## %entry +; X64-AVX512VL-NEXT: vaddps {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; X64-AVX512VL-NEXT: retq entry: %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000> ret <8 x float> %g @@ -597,7 +699,7 @@ entry: define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { ; X32-LABEL: _e2: ; X32: ## BB#0: -; X32-NEXT: vbroadcastss LCPI29_0, %xmm0 +; X32-NEXT: vbroadcastss LCPI31_0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: _e2: @@ -637,25 +739,25 @@ define void @crash() nounwind alwaysinline { ; X32: ## BB#0: ## %WGLoopsEntry ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: testb %al, %al -; X32-NEXT: je LBB31_1 +; X32-NEXT: je LBB33_1 ; X32-NEXT: ## BB#2: ## %ret ; X32-NEXT: retl ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: LBB31_1: ## %footer349VF +; X32-NEXT: LBB33_1: ## %footer349VF ; X32-NEXT: ## =>This Inner Loop Header: Depth=1 -; X32-NEXT: jmp LBB31_1 +; X32-NEXT: jmp LBB33_1 ; ; X64-LABEL: crash: ; X64: ## BB#0: ## %WGLoopsEntry ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: testb %al, %al -; X64-NEXT: je LBB31_1 +; X64-NEXT: je LBB33_1 ; X64-NEXT: ## BB#2: ## %ret ; X64-NEXT: retq ; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: LBB31_1: ## %footer349VF +; X64-NEXT: LBB33_1: ## %footer349VF ; X64-NEXT: ## =>This Inner Loop Header: Depth=1 -; X64-NEXT: jmp LBB31_1 +; X64-NEXT: jmp LBB33_1 WGLoopsEntry: br i1 undef, label %ret, label %footer329VF @@ -688,11 +790,16 @@ define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp { ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 ; X32-NEXT: retl ; -; X64-LABEL: _inreg0: -; X64: ## BB#0: -; X64-NEXT: vmovd %edi, %xmm0 -; X64-NEXT: vbroadcastss %xmm0, %ymm0 -; X64-NEXT: retq +; X64-AVX2-LABEL: _inreg0: +; X64-AVX2: ## BB#0: +; X64-AVX2-NEXT: vmovd %edi, %xmm0 +; X64-AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 +; X64-AVX2-NEXT: retq +; +; X64-AVX512VL-LABEL: _inreg0: +; X64-AVX512VL: ## BB#0: +; X64-AVX512VL-NEXT: vpbroadcastd %edi, %ymm0 +; X64-AVX512VL-NEXT: retq %in = insertelement <8 x i32> undef, i32 %scalar, i32 0 %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer ret <8 x i32> %wide @@ -997,10 +1104,56 @@ define <4 x double> @splat_concat4(double %d) { ; load will not create a cycle in the DAG. ; Those test cases exerce the latter. -; CHECK-LABEL: isel_crash_16b -; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}} -; CHECK: ret define void @isel_crash_16b(i8* %cV_R.addr) { +; X32-AVX2-LABEL: isel_crash_16b: +; X32-AVX2: ## BB#0: ## %eintry +; X32-AVX2-NEXT: subl $60, %esp +; X32-AVX2-NEXT: Lcfi0: +; X32-AVX2-NEXT: .cfi_def_cfa_offset 64 +; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX2-NEXT: vmovaps %xmm0, (%esp) +; X32-AVX2-NEXT: vpbroadcastb (%eax), %xmm1 +; X32-AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: addl $60, %esp +; X32-AVX2-NEXT: retl +; +; X64-AVX2-LABEL: isel_crash_16b: +; X64-AVX2: ## BB#0: ## %eintry +; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: movb (%rdi), %al +; X64-AVX2-NEXT: vmovd %eax, %xmm1 +; X64-AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 +; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: isel_crash_16b: +; X32-AVX512VL: ## BB#0: ## %eintry +; X32-AVX512VL-NEXT: subl $60, %esp +; X32-AVX512VL-NEXT: Lcfi0: +; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 64 +; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX512VL-NEXT: vmovaps %xmm0, (%esp) +; X32-AVX512VL-NEXT: vpbroadcastb (%eax), %xmm1 +; X32-AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: addl $60, %esp +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: isel_crash_16b: +; X64-AVX512VL: ## BB#0: ## %eintry +; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: movb (%rdi), %al +; X64-AVX512VL-NEXT: vmovd %eax, %xmm1 +; X64-AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 +; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: retq eintry: %__a.addr.i = alloca <2 x i64>, align 16 %__b.addr.i = alloca <2 x i64>, align 16 @@ -1016,10 +1169,98 @@ eintry: ret void } -; CHECK-LABEL: isel_crash_32b -; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}} -; CHECK: ret define void @isel_crash_32b(i8* %cV_R.addr) { +; X32-AVX2-LABEL: isel_crash_32b: +; X32-AVX2: ## BB#0: ## %eintry +; X32-AVX2-NEXT: pushl %ebp +; X32-AVX2-NEXT: Lcfi1: +; X32-AVX2-NEXT: .cfi_def_cfa_offset 8 +; X32-AVX2-NEXT: Lcfi2: +; X32-AVX2-NEXT: .cfi_offset %ebp, -8 +; X32-AVX2-NEXT: movl %esp, %ebp +; X32-AVX2-NEXT: Lcfi3: +; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp +; X32-AVX2-NEXT: andl $-32, %esp +; X32-AVX2-NEXT: subl $128, %esp +; X32-AVX2-NEXT: movl 8(%ebp), %eax +; X32-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX2-NEXT: vmovaps %ymm0, (%esp) +; X32-AVX2-NEXT: vpbroadcastb (%eax), %ymm1 +; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: movl %ebp, %esp +; X32-AVX2-NEXT: popl %ebp +; X32-AVX2-NEXT: vzeroupper +; X32-AVX2-NEXT: retl +; +; X64-AVX2-LABEL: isel_crash_32b: +; X64-AVX2: ## BB#0: ## %eintry +; X64-AVX2-NEXT: pushq %rbp +; X64-AVX2-NEXT: Lcfi0: +; X64-AVX2-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX2-NEXT: Lcfi1: +; X64-AVX2-NEXT: .cfi_offset %rbp, -16 +; X64-AVX2-NEXT: movq %rsp, %rbp +; X64-AVX2-NEXT: Lcfi2: +; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp +; X64-AVX2-NEXT: andq $-32, %rsp +; X64-AVX2-NEXT: subq $128, %rsp +; X64-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp) +; X64-AVX2-NEXT: movb (%rdi), %al +; X64-AVX2-NEXT: vmovd %eax, %xmm1 +; X64-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 +; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: movq %rbp, %rsp +; X64-AVX2-NEXT: popq %rbp +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: isel_crash_32b: +; X32-AVX512VL: ## BB#0: ## %eintry +; X32-AVX512VL-NEXT: pushl %ebp +; X32-AVX512VL-NEXT: Lcfi1: +; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8 +; X32-AVX512VL-NEXT: Lcfi2: +; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8 +; X32-AVX512VL-NEXT: movl %esp, %ebp +; X32-AVX512VL-NEXT: Lcfi3: +; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp +; X32-AVX512VL-NEXT: andl $-32, %esp +; X32-AVX512VL-NEXT: subl $128, %esp +; X32-AVX512VL-NEXT: movl 8(%ebp), %eax +; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp) +; X32-AVX512VL-NEXT: vpbroadcastb (%eax), %ymm1 +; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: movl %ebp, %esp +; X32-AVX512VL-NEXT: popl %ebp +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: isel_crash_32b: +; X64-AVX512VL: ## BB#0: ## %eintry +; X64-AVX512VL-NEXT: pushq %rbp +; X64-AVX512VL-NEXT: Lcfi0: +; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX512VL-NEXT: Lcfi1: +; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16 +; X64-AVX512VL-NEXT: movq %rsp, %rbp +; X64-AVX512VL-NEXT: Lcfi2: +; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp +; X64-AVX512VL-NEXT: andq $-32, %rsp +; X64-AVX512VL-NEXT: subq $128, %rsp +; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp) +; X64-AVX512VL-NEXT: movb (%rdi), %al +; X64-AVX512VL-NEXT: vmovd %eax, %xmm1 +; X64-AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 +; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: movq %rbp, %rsp +; X64-AVX512VL-NEXT: popq %rbp +; X64-AVX512VL-NEXT: retq eintry: %__a.addr.i = alloca <4 x i64>, align 16 %__b.addr.i = alloca <4 x i64>, align 16 @@ -1035,10 +1276,56 @@ eintry: ret void } -; CHECK-LABEL: isel_crash_8w -; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}} -; CHECK: ret define void @isel_crash_8w(i16* %cV_R.addr) { +; X32-AVX2-LABEL: isel_crash_8w: +; X32-AVX2: ## BB#0: ## %entry +; X32-AVX2-NEXT: subl $60, %esp +; X32-AVX2-NEXT: Lcfi4: +; X32-AVX2-NEXT: .cfi_def_cfa_offset 64 +; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX2-NEXT: vmovaps %xmm0, (%esp) +; X32-AVX2-NEXT: vpbroadcastw (%eax), %xmm1 +; X32-AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: addl $60, %esp +; X32-AVX2-NEXT: retl +; +; X64-AVX2-LABEL: isel_crash_8w: +; X64-AVX2: ## BB#0: ## %entry +; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: movw (%rdi), %ax +; X64-AVX2-NEXT: vmovd %eax, %xmm1 +; X64-AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 +; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: isel_crash_8w: +; X32-AVX512VL: ## BB#0: ## %entry +; X32-AVX512VL-NEXT: subl $60, %esp +; X32-AVX512VL-NEXT: Lcfi4: +; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 64 +; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX512VL-NEXT: vmovaps %xmm0, (%esp) +; X32-AVX512VL-NEXT: vpbroadcastw (%eax), %xmm1 +; X32-AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: addl $60, %esp +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: isel_crash_8w: +; X64-AVX512VL: ## BB#0: ## %entry +; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: movw (%rdi), %ax +; X64-AVX512VL-NEXT: vmovd %eax, %xmm1 +; X64-AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 +; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: retq entry: %__a.addr.i = alloca <2 x i64>, align 16 %__b.addr.i = alloca <2 x i64>, align 16 @@ -1054,10 +1341,98 @@ entry: ret void } -; CHECK-LABEL: isel_crash_16w -; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}} -; CHECK: ret define void @isel_crash_16w(i16* %cV_R.addr) { +; X32-AVX2-LABEL: isel_crash_16w: +; X32-AVX2: ## BB#0: ## %eintry +; X32-AVX2-NEXT: pushl %ebp +; X32-AVX2-NEXT: Lcfi5: +; X32-AVX2-NEXT: .cfi_def_cfa_offset 8 +; X32-AVX2-NEXT: Lcfi6: +; X32-AVX2-NEXT: .cfi_offset %ebp, -8 +; X32-AVX2-NEXT: movl %esp, %ebp +; X32-AVX2-NEXT: Lcfi7: +; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp +; X32-AVX2-NEXT: andl $-32, %esp +; X32-AVX2-NEXT: subl $128, %esp +; X32-AVX2-NEXT: movl 8(%ebp), %eax +; X32-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX2-NEXT: vmovaps %ymm0, (%esp) +; X32-AVX2-NEXT: vpbroadcastw (%eax), %ymm1 +; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: movl %ebp, %esp +; X32-AVX2-NEXT: popl %ebp +; X32-AVX2-NEXT: vzeroupper +; X32-AVX2-NEXT: retl +; +; X64-AVX2-LABEL: isel_crash_16w: +; X64-AVX2: ## BB#0: ## %eintry +; X64-AVX2-NEXT: pushq %rbp +; X64-AVX2-NEXT: Lcfi3: +; X64-AVX2-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX2-NEXT: Lcfi4: +; X64-AVX2-NEXT: .cfi_offset %rbp, -16 +; X64-AVX2-NEXT: movq %rsp, %rbp +; X64-AVX2-NEXT: Lcfi5: +; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp +; X64-AVX2-NEXT: andq $-32, %rsp +; X64-AVX2-NEXT: subq $128, %rsp +; X64-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp) +; X64-AVX2-NEXT: movw (%rdi), %ax +; X64-AVX2-NEXT: vmovd %eax, %xmm1 +; X64-AVX2-NEXT: vpbroadcastw %xmm1, %ymm1 +; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: movq %rbp, %rsp +; X64-AVX2-NEXT: popq %rbp +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: isel_crash_16w: +; X32-AVX512VL: ## BB#0: ## %eintry +; X32-AVX512VL-NEXT: pushl %ebp +; X32-AVX512VL-NEXT: Lcfi5: +; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8 +; X32-AVX512VL-NEXT: Lcfi6: +; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8 +; X32-AVX512VL-NEXT: movl %esp, %ebp +; X32-AVX512VL-NEXT: Lcfi7: +; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp +; X32-AVX512VL-NEXT: andl $-32, %esp +; X32-AVX512VL-NEXT: subl $128, %esp +; X32-AVX512VL-NEXT: movl 8(%ebp), %eax +; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp) +; X32-AVX512VL-NEXT: vpbroadcastw (%eax), %ymm1 +; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: movl %ebp, %esp +; X32-AVX512VL-NEXT: popl %ebp +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: isel_crash_16w: +; X64-AVX512VL: ## BB#0: ## %eintry +; X64-AVX512VL-NEXT: pushq %rbp +; X64-AVX512VL-NEXT: Lcfi3: +; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX512VL-NEXT: Lcfi4: +; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16 +; X64-AVX512VL-NEXT: movq %rsp, %rbp +; X64-AVX512VL-NEXT: Lcfi5: +; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp +; X64-AVX512VL-NEXT: andq $-32, %rsp +; X64-AVX512VL-NEXT: subq $128, %rsp +; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp) +; X64-AVX512VL-NEXT: movw (%rdi), %ax +; X64-AVX512VL-NEXT: vmovd %eax, %xmm1 +; X64-AVX512VL-NEXT: vpbroadcastw %xmm1, %ymm1 +; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: movq %rbp, %rsp +; X64-AVX512VL-NEXT: popq %rbp +; X64-AVX512VL-NEXT: retq eintry: %__a.addr.i = alloca <4 x i64>, align 16 %__b.addr.i = alloca <4 x i64>, align 16 @@ -1073,10 +1448,41 @@ eintry: ret void } -; CHECK-LABEL: isel_crash_4d -; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}} -; CHECK: ret define void @isel_crash_4d(i32* %cV_R.addr) { +; X32-LABEL: isel_crash_4d: +; X32: ## BB#0: ## %entry +; X32-NEXT: subl $60, %esp +; X32-NEXT: Lcfi8: +; X32-NEXT: .cfi_def_cfa_offset 64 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-NEXT: vmovaps %xmm0, (%esp) +; X32-NEXT: vbroadcastss (%eax), %xmm1 +; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: addl $60, %esp +; X32-NEXT: retl +; +; X64-AVX2-LABEL: isel_crash_4d: +; X64-AVX2: ## BB#0: ## %entry +; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: movl (%rdi), %eax +; X64-AVX2-NEXT: vmovd %eax, %xmm1 +; X64-AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 +; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: retq +; +; X64-AVX512VL-LABEL: isel_crash_4d: +; X64-AVX512VL: ## BB#0: ## %entry +; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: movl (%rdi), %eax +; X64-AVX512VL-NEXT: vpbroadcastd %eax, %xmm1 +; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: retq entry: %__a.addr.i = alloca <2 x i64>, align 16 %__b.addr.i = alloca <2 x i64>, align 16 @@ -1092,10 +1498,97 @@ entry: ret void } -; CHECK-LABEL: isel_crash_8d -; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}} -; CHECK: ret define void @isel_crash_8d(i32* %cV_R.addr) { +; X32-AVX2-LABEL: isel_crash_8d: +; X32-AVX2: ## BB#0: ## %eintry +; X32-AVX2-NEXT: pushl %ebp +; X32-AVX2-NEXT: Lcfi9: +; X32-AVX2-NEXT: .cfi_def_cfa_offset 8 +; X32-AVX2-NEXT: Lcfi10: +; X32-AVX2-NEXT: .cfi_offset %ebp, -8 +; X32-AVX2-NEXT: movl %esp, %ebp +; X32-AVX2-NEXT: Lcfi11: +; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp +; X32-AVX2-NEXT: andl $-32, %esp +; X32-AVX2-NEXT: subl $128, %esp +; X32-AVX2-NEXT: movl 8(%ebp), %eax +; X32-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX2-NEXT: vmovaps %ymm0, (%esp) +; X32-AVX2-NEXT: vbroadcastss (%eax), %ymm1 +; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: movl %ebp, %esp +; X32-AVX2-NEXT: popl %ebp +; X32-AVX2-NEXT: vzeroupper +; X32-AVX2-NEXT: retl +; +; X64-AVX2-LABEL: isel_crash_8d: +; X64-AVX2: ## BB#0: ## %eintry +; X64-AVX2-NEXT: pushq %rbp +; X64-AVX2-NEXT: Lcfi6: +; X64-AVX2-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX2-NEXT: Lcfi7: +; X64-AVX2-NEXT: .cfi_offset %rbp, -16 +; X64-AVX2-NEXT: movq %rsp, %rbp +; X64-AVX2-NEXT: Lcfi8: +; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp +; X64-AVX2-NEXT: andq $-32, %rsp +; X64-AVX2-NEXT: subq $128, %rsp +; X64-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp) +; X64-AVX2-NEXT: movl (%rdi), %eax +; X64-AVX2-NEXT: vmovd %eax, %xmm1 +; X64-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 +; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: movq %rbp, %rsp +; X64-AVX2-NEXT: popq %rbp +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: isel_crash_8d: +; X32-AVX512VL: ## BB#0: ## %eintry +; X32-AVX512VL-NEXT: pushl %ebp +; X32-AVX512VL-NEXT: Lcfi9: +; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8 +; X32-AVX512VL-NEXT: Lcfi10: +; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8 +; X32-AVX512VL-NEXT: movl %esp, %ebp +; X32-AVX512VL-NEXT: Lcfi11: +; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp +; X32-AVX512VL-NEXT: andl $-32, %esp +; X32-AVX512VL-NEXT: subl $128, %esp +; X32-AVX512VL-NEXT: movl 8(%ebp), %eax +; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp) +; X32-AVX512VL-NEXT: vbroadcastss (%eax), %ymm1 +; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: movl %ebp, %esp +; X32-AVX512VL-NEXT: popl %ebp +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: isel_crash_8d: +; X64-AVX512VL: ## BB#0: ## %eintry +; X64-AVX512VL-NEXT: pushq %rbp +; X64-AVX512VL-NEXT: Lcfi6: +; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX512VL-NEXT: Lcfi7: +; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16 +; X64-AVX512VL-NEXT: movq %rsp, %rbp +; X64-AVX512VL-NEXT: Lcfi8: +; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp +; X64-AVX512VL-NEXT: andq $-32, %rsp +; X64-AVX512VL-NEXT: subq $128, %rsp +; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp) +; X64-AVX512VL-NEXT: movl (%rdi), %eax +; X64-AVX512VL-NEXT: vpbroadcastd %eax, %ymm1 +; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: movq %rbp, %rsp +; X64-AVX512VL-NEXT: popq %rbp +; X64-AVX512VL-NEXT: retq eintry: %__a.addr.i = alloca <4 x i64>, align 16 %__b.addr.i = alloca <4 x i64>, align 16 @@ -1111,10 +1604,65 @@ eintry: ret void } -; X64-LABEL: isel_crash_2q -; X64: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}} -; X64: ret define void @isel_crash_2q(i64* %cV_R.addr) { +; X32-AVX2-LABEL: isel_crash_2q: +; X32-AVX2: ## BB#0: ## %entry +; X32-AVX2-NEXT: subl $60, %esp +; X32-AVX2-NEXT: Lcfi12: +; X32-AVX2-NEXT: .cfi_def_cfa_offset 64 +; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX2-NEXT: vmovaps %xmm0, (%esp) +; X32-AVX2-NEXT: movl (%eax), %ecx +; X32-AVX2-NEXT: movl 4(%eax), %eax +; X32-AVX2-NEXT: vmovd %ecx, %xmm1 +; X32-AVX2-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X32-AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 +; X32-AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X32-AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: addl $60, %esp +; X32-AVX2-NEXT: retl +; +; X64-AVX2-LABEL: isel_crash_2q: +; X64-AVX2: ## BB#0: ## %entry +; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: movq (%rdi), %rax +; X64-AVX2-NEXT: vmovq %rax, %xmm1 +; X64-AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 +; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: isel_crash_2q: +; X32-AVX512VL: ## BB#0: ## %entry +; X32-AVX512VL-NEXT: subl $60, %esp +; X32-AVX512VL-NEXT: Lcfi12: +; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 64 +; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX512VL-NEXT: vmovaps %xmm0, (%esp) +; X32-AVX512VL-NEXT: movl (%eax), %ecx +; X32-AVX512VL-NEXT: movl 4(%eax), %eax +; X32-AVX512VL-NEXT: vmovd %ecx, %xmm1 +; X32-AVX512VL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 +; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X32-AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: addl $60, %esp +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: isel_crash_2q: +; X64-AVX512VL: ## BB#0: ## %entry +; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: movq (%rdi), %rax +; X64-AVX512VL-NEXT: vpbroadcastq %rax, %xmm1 +; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: retq entry: %__a.addr.i = alloca <2 x i64>, align 16 %__b.addr.i = alloca <2 x i64>, align 16 @@ -1129,10 +1677,109 @@ entry: ret void } -; X64-LABEL: isel_crash_4q -; X64: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}} -; X64: ret define void @isel_crash_4q(i64* %cV_R.addr) { +; X32-AVX2-LABEL: isel_crash_4q: +; X32-AVX2: ## BB#0: ## %eintry +; X32-AVX2-NEXT: pushl %ebp +; X32-AVX2-NEXT: Lcfi13: +; X32-AVX2-NEXT: .cfi_def_cfa_offset 8 +; X32-AVX2-NEXT: Lcfi14: +; X32-AVX2-NEXT: .cfi_offset %ebp, -8 +; X32-AVX2-NEXT: movl %esp, %ebp +; X32-AVX2-NEXT: Lcfi15: +; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp +; X32-AVX2-NEXT: andl $-32, %esp +; X32-AVX2-NEXT: subl $128, %esp +; X32-AVX2-NEXT: movl 8(%ebp), %eax +; X32-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX2-NEXT: vmovaps %ymm0, (%esp) +; X32-AVX2-NEXT: movl (%eax), %ecx +; X32-AVX2-NEXT: movl 4(%eax), %eax +; X32-AVX2-NEXT: vmovd %ecx, %xmm1 +; X32-AVX2-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X32-AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 +; X32-AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X32-AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 +; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) +; X32-AVX2-NEXT: movl %ebp, %esp +; X32-AVX2-NEXT: popl %ebp +; X32-AVX2-NEXT: vzeroupper +; X32-AVX2-NEXT: retl +; +; X64-AVX2-LABEL: isel_crash_4q: +; X64-AVX2: ## BB#0: ## %eintry +; X64-AVX2-NEXT: pushq %rbp +; X64-AVX2-NEXT: Lcfi9: +; X64-AVX2-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX2-NEXT: Lcfi10: +; X64-AVX2-NEXT: .cfi_offset %rbp, -16 +; X64-AVX2-NEXT: movq %rsp, %rbp +; X64-AVX2-NEXT: Lcfi11: +; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp +; X64-AVX2-NEXT: andq $-32, %rsp +; X64-AVX2-NEXT: subq $128, %rsp +; X64-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp) +; X64-AVX2-NEXT: movq (%rdi), %rax +; X64-AVX2-NEXT: vmovq %rax, %xmm1 +; X64-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 +; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX2-NEXT: movq %rbp, %rsp +; X64-AVX2-NEXT: popq %rbp +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X32-AVX512VL-LABEL: isel_crash_4q: +; X32-AVX512VL: ## BB#0: ## %eintry +; X32-AVX512VL-NEXT: pushl %ebp +; X32-AVX512VL-NEXT: Lcfi13: +; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8 +; X32-AVX512VL-NEXT: Lcfi14: +; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8 +; X32-AVX512VL-NEXT: movl %esp, %ebp +; X32-AVX512VL-NEXT: Lcfi15: +; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp +; X32-AVX512VL-NEXT: andl $-32, %esp +; X32-AVX512VL-NEXT: subl $128, %esp +; X32-AVX512VL-NEXT: movl 8(%ebp), %eax +; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp) +; X32-AVX512VL-NEXT: movl (%eax), %ecx +; X32-AVX512VL-NEXT: movl 4(%eax), %eax +; X32-AVX512VL-NEXT: vmovd %ecx, %xmm1 +; X32-AVX512VL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 +; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X32-AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1 +; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: movl %ebp, %esp +; X32-AVX512VL-NEXT: popl %ebp +; X32-AVX512VL-NEXT: retl +; +; X64-AVX512VL-LABEL: isel_crash_4q: +; X64-AVX512VL: ## BB#0: ## %eintry +; X64-AVX512VL-NEXT: pushq %rbp +; X64-AVX512VL-NEXT: Lcfi9: +; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX512VL-NEXT: Lcfi10: +; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16 +; X64-AVX512VL-NEXT: movq %rsp, %rbp +; X64-AVX512VL-NEXT: Lcfi11: +; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp +; X64-AVX512VL-NEXT: andq $-32, %rsp +; X64-AVX512VL-NEXT: subq $128, %rsp +; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp) +; X64-AVX512VL-NEXT: movq (%rdi), %rax +; X64-AVX512VL-NEXT: vpbroadcastq %rax, %ymm1 +; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: movq %rbp, %rsp +; X64-AVX512VL-NEXT: popq %rbp +; X64-AVX512VL-NEXT: retq eintry: %__a.addr.i = alloca <4 x i64>, align 16 %__b.addr.i = alloca <4 x i64>, align 16 |