diff options
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r-- | test/CodeGen/X86/adx-intrinsics.ll | 27 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512bw-intrinsics.ll | 26 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512bwvl-intrinsics.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/X86/pr33349.ll | 92 | ||||
-rw-r--r-- | test/CodeGen/X86/pr34088.ll | 46 | ||||
-rw-r--r-- | test/CodeGen/X86/select-mmx.ll | 120 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-128-v16.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-128-v8.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-256-v16.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-256-v32.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-512-v32.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-512-v64.ll | 4 |
12 files changed, 323 insertions, 38 deletions
diff --git a/test/CodeGen/X86/adx-intrinsics.ll b/test/CodeGen/X86/adx-intrinsics.ll index 0498177a9c124..819a5df14e63f 100644 --- a/test/CodeGen/X86/adx-intrinsics.ll +++ b/test/CodeGen/X86/adx-intrinsics.ll @@ -75,3 +75,30 @@ define i8 @test_subborrow_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { ret i8 %ret; } +; Try a version with loads. Previously we crashed on this. +define i32 @load_crash(i64* nocapture readonly %a, i64* nocapture readonly %b, i64* %res) { +; CHECK-LABEL: load_crash +; CHECK: addb +; ADX: adcxq +; CHECK: setb +; CHECK: retq + %1 = load i64, i64* %a, align 8 + %2 = load i64, i64* %b, align 8 + %3 = bitcast i64* %res to i8* + %4 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 %1, i64 %2, i8* %3) + %conv = zext i8 %4 to i32 + ret i32 %conv +} + +; Try a really simple all zero input case, which also used to crash +define void @allzeros() { +; CHECK-LABEL: allzeros +; CHECK: xorl +; CHECK: addb +; CHECK: sbbq +; CHECK: andl +; CHECK: retq +entry: + %0 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 0, i64 0, i8* null) + ret void +} diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 5472f057ef27f..4abe3df9fc2a4 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1921,9 +1921,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> ; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovq %rsi, %k1 -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1} -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z} -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2 +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm1 {%k1} {z} +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm0 {%k1} +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm2 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq @@ -1934,9 +1934,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z} -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1} -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2 +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm1 {%k1} {z} +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm0 {%k1} +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm2 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl @@ -1954,20 +1954,20 @@ define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i ; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovd %esi, %k1 -; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1} -; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z} -; AVX512BW-NEXT: vpbroadcastw %di, %zmm2 +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm1 {%k1} {z} +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm0 {%k1} +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm2 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1} -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z} -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2 +; AVX512F-32-NEXT: movw {{[0-9]+}}(%esp), %ax +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm1 {%k1} {z} +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm0 {%k1} +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm2 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index c3ba6f106e6a7..9ceb3e5931a65 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2799,9 +2799,9 @@ define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] -; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] -; CHECK-NEXT: vpbroadcastb %dil, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] +; CHECK-NEXT: vpbroadcastb %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] +; CHECK-NEXT: vpbroadcastb %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] ; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2819,9 +2819,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] -; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] -; CHECK-NEXT: vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] +; CHECK-NEXT: vpbroadcastb %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] +; CHECK-NEXT: vpbroadcastb %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2839,9 +2839,9 @@ define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] -; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] -; CHECK-NEXT: vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] +; CHECK-NEXT: vpbroadcastw %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] +; CHECK-NEXT: vpbroadcastw %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2859,9 +2859,9 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] -; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] -; CHECK-NEXT: vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] +; CHECK-NEXT: vpbroadcastw %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] +; CHECK-NEXT: vpbroadcastw %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] diff --git a/test/CodeGen/X86/pr33349.ll b/test/CodeGen/X86/pr33349.ll new file mode 100644 index 0000000000000..db866db224814 --- /dev/null +++ b/test/CodeGen/X86/pr33349.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=KNL +; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=SKX + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + + define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr { +; KNL-LABEL: test: +; KNL: # BB#0: # %bb +; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fld1 +; KNL-NEXT: fldz +; KNL-NEXT: fld %st(0) +; KNL-NEXT: fcmovne %st(2), %st(0) +; KNL-NEXT: vpextrb $4, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fld %st(1) +; KNL-NEXT: fcmovne %st(3), %st(0) +; KNL-NEXT: vpextrb $8, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fld %st(2) +; KNL-NEXT: fcmovne %st(4), %st(0) +; KNL-NEXT: vpextrb $12, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fxch %st(3) +; KNL-NEXT: fcmovne %st(4), %st(0) +; KNL-NEXT: fstp %st(4) +; KNL-NEXT: fxch %st(3) +; KNL-NEXT: fstpt 30(%rdi) +; KNL-NEXT: fxch %st(1) +; KNL-NEXT: fstpt 20(%rdi) +; KNL-NEXT: fxch %st(1) +; KNL-NEXT: fstpt 10(%rdi) +; KNL-NEXT: fstpt (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: test: +; SKX: # BB#0: # %bb +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 +; SKX-NEXT: kshiftrw $2, %k0, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k2 +; SKX-NEXT: kshiftrw $15, %k2, %k2 +; SKX-NEXT: kshiftlw $15, %k2, %k2 +; SKX-NEXT: kshiftrw $15, %k2, %k2 +; SKX-NEXT: kmovd %k2, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fld1 +; SKX-NEXT: fldz +; SKX-NEXT: fld %st(0) +; SKX-NEXT: fcmovne %st(2), %st(0) +; SKX-NEXT: kshiftlw $14, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kmovd %k1, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fld %st(1) +; SKX-NEXT: fcmovne %st(3), %st(0) +; SKX-NEXT: kshiftlw $15, %k0, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kmovd %k1, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fld %st(2) +; SKX-NEXT: fcmovne %st(4), %st(0) +; SKX-NEXT: kshiftlw $14, %k0, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kshiftlw $15, %k0, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fxch %st(3) +; SKX-NEXT: fcmovne %st(4), %st(0) +; SKX-NEXT: fstp %st(4) +; SKX-NEXT: fxch %st(3) +; SKX-NEXT: fstpt 10(%rdi) +; SKX-NEXT: fxch %st(1) +; SKX-NEXT: fstpt (%rdi) +; SKX-NEXT: fxch %st(1) +; SKX-NEXT: fstpt 30(%rdi) +; SKX-NEXT: fstpt 20(%rdi) +; SKX-NEXT: retq + bb: + %tmp = select <4 x i1> %m, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer + store <4 x x86_fp80> %tmp, <4 x x86_fp80>* %p, align 16 + ret void + } + diff --git a/test/CodeGen/X86/pr34088.ll b/test/CodeGen/X86/pr34088.ll new file mode 100644 index 0000000000000..d3667e3884d41 --- /dev/null +++ b/test/CodeGen/X86/pr34088.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mcpu=pentium4 | FileCheck %s + +%struct.Foo = type { i32, %struct.Bar } +%struct.Bar = type { i32, %struct.Buffer, i32 } +%struct.Buffer = type { i8*, i32 } + +; This test checks that the load of store %2 is not dropped. +; +define i32 @pr34088() local_unnamed_addr { +; CHECK-LABEL: pr34088: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .Lcfi1: +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .Lcfi2: +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $32, %esp +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205] +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movaps %xmm0, (%esp) +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movaps %xmm1, (%esp) +; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +entry: + %foo = alloca %struct.Foo, align 4 + %0 = bitcast %struct.Foo* %foo to i8* + call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 0, i32 20, i32 4, i1 false) + %buffer1 = getelementptr inbounds %struct.Foo, %struct.Foo* %foo, i32 0, i32 1, i32 1 + %1 = bitcast %struct.Buffer* %buffer1 to i64* + %2 = load i64, i64* %1, align 4 + call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 -51, i32 20, i32 4, i1 false) + store i64 %2, i64* %1, align 4 + ret i32 0 +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) diff --git a/test/CodeGen/X86/select-mmx.ll b/test/CodeGen/X86/select-mmx.ll new file mode 100644 index 0000000000000..9e6382faaa59a --- /dev/null +++ b/test/CodeGen/X86/select-mmx.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+mmx < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -mtriple=i686-unknown-unknown -mattr=+mmx < %s | FileCheck %s --check-prefix=I32 + + +; From source: clang -02 +;__m64 test47(int a) +;{ +; __m64 x = (a)? (__m64)(7): (__m64)(0); +; return __builtin_ia32_psllw(x, x); +;} + +define i64 @test47(i64 %arg) { +; +; X64-LABEL: test47: +; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testq %rdi, %rdi +; X64-NEXT: movl $7, %ecx +; X64-NEXT: cmoveq %rcx, %rax +; X64-NEXT: movd %rax, %mm0 +; X64-NEXT: psllw %mm0, %mm0 +; X64-NEXT: movd %mm0, %rax +; X64-NEXT: retq +; +; I32-LABEL: test47: +; I32: # BB#0: +; I32-NEXT: pushl %ebp +; I32-NEXT: .Lcfi0: +; I32-NEXT: .cfi_def_cfa_offset 8 +; I32-NEXT: .Lcfi1: +; I32-NEXT: .cfi_offset %ebp, -8 +; I32-NEXT: movl %esp, %ebp +; I32-NEXT: .Lcfi2: +; I32-NEXT: .cfi_def_cfa_register %ebp +; I32-NEXT: andl $-8, %esp +; I32-NEXT: subl $16, %esp +; I32-NEXT: movl 8(%ebp), %eax +; I32-NEXT: orl 12(%ebp), %eax +; I32-NEXT: movl $7, %eax +; I32-NEXT: je .LBB0_2 +; I32-NEXT: # BB#1: +; I32-NEXT: xorl %eax, %eax +; I32-NEXT: .LBB0_2: +; I32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I32-NEXT: movl $0, {{[0-9]+}}(%esp) +; I32-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; I32-NEXT: psllw %mm0, %mm0 +; I32-NEXT: movq %mm0, (%esp) +; I32-NEXT: movl (%esp), %eax +; I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; I32-NEXT: movl %ebp, %esp +; I32-NEXT: popl %ebp +; I32-NEXT: retl + %cond = icmp eq i64 %arg, 0 + %slct = select i1 %cond, x86_mmx bitcast (i64 7 to x86_mmx), x86_mmx bitcast (i64 0 to x86_mmx) + %psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct) + %retc = bitcast x86_mmx %psll to i64 + ret i64 %retc +} + + +; From source: clang -O2 +;__m64 test49(int a, long long n, long long m) +;{ +; __m64 x = (a)? (__m64)(n): (__m64)(m); +; return __builtin_ia32_psllw(x, x); +;} + +define i64 @test49(i64 %arg, i64 %x, i64 %y) { +; +; X64-LABEL: test49: +; X64: # BB#0: +; X64-NEXT: testq %rdi, %rdi +; X64-NEXT: cmovneq %rdx, %rsi +; X64-NEXT: movd %rsi, %mm0 +; X64-NEXT: psllw %mm0, %mm0 +; X64-NEXT: movd %mm0, %rax +; X64-NEXT: retq +; +; I32-LABEL: test49: +; I32: # BB#0: +; I32-NEXT: pushl %ebp +; I32-NEXT: .Lcfi3: +; I32-NEXT: .cfi_def_cfa_offset 8 +; I32-NEXT: .Lcfi4: +; I32-NEXT: .cfi_offset %ebp, -8 +; I32-NEXT: movl %esp, %ebp +; I32-NEXT: .Lcfi5: +; I32-NEXT: .cfi_def_cfa_register %ebp +; I32-NEXT: andl $-8, %esp +; I32-NEXT: subl $8, %esp +; I32-NEXT: movl 8(%ebp), %eax +; I32-NEXT: orl 12(%ebp), %eax +; I32-NEXT: je .LBB1_1 +; I32-NEXT: # BB#2: +; I32-NEXT: leal 24(%ebp), %eax +; I32-NEXT: jmp .LBB1_3 +; I32-NEXT: .LBB1_1: +; I32-NEXT: leal 16(%ebp), %eax +; I32-NEXT: .LBB1_3: +; I32-NEXT: movq (%eax), %mm0 +; I32-NEXT: psllw %mm0, %mm0 +; I32-NEXT: movq %mm0, (%esp) +; I32-NEXT: movl (%esp), %eax +; I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; I32-NEXT: movl %ebp, %esp +; I32-NEXT: popl %ebp +; I32-NEXT: retl + %cond = icmp eq i64 %arg, 0 + %xmmx = bitcast i64 %x to x86_mmx + %ymmx = bitcast i64 %y to x86_mmx + %slct = select i1 %cond, x86_mmx %xmmx, x86_mmx %ymmx + %psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct) + %retc = bitcast x86_mmx %psll to i64 + ret i64 %retc +} + +declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) + diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index abba0ff87aced..9f1ed021992df 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1643,7 +1643,7 @@ define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 @@ -1696,7 +1696,7 @@ define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index c03b9d1472c19..1cf8453fc6ad3 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2274,7 +2274,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { ; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -2390,7 +2390,7 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -2443,7 +2443,7 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index 6f5d916f2294b..ba7c0894b932d 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -4069,7 +4069,7 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) { ; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %ymm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index 05a797cb6f8e0..d51b69415b93a 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2431,7 +2431,7 @@ define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(i8* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %ymm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %ymm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-512-v32.ll b/test/CodeGen/X86/vector-shuffle-512-v32.ll index 7a5c992bb8290..b8fc27ba55156 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -228,7 +228,7 @@ define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) { ; SKX-LABEL: insert_dup_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movl (%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -249,7 +249,7 @@ define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) { ; SKX-LABEL: insert_dup_mem_v32i16_sext_i16: ; SKX: ## BB#0: ; SKX-NEXT: movswl (%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -269,7 +269,7 @@ define <32 x i16> @insert_dup_elt1_mem_v32i16_i32(i32* %ptr) #0 { ; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movzwl 2(%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -288,7 +288,7 @@ define <32 x i16> @insert_dup_elt3_mem_v32i16_i32(i32* %ptr) #0 { ; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movzwl 2(%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 diff --git a/test/CodeGen/X86/vector-shuffle-512-v64.ll b/test/CodeGen/X86/vector-shuffle-512-v64.ll index f4650ec741a71..9dca3191e06b7 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -332,7 +332,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) { ; AVX512BW: # BB#0: ; AVX512BW-NEXT: movsbl (%rdi), %eax ; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: vpbroadcastb %al, %zmm0 +; AVX512BW-NEXT: vpbroadcastb %eax, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: @@ -348,7 +348,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) { ; AVX512VBMI: # BB#0: ; AVX512VBMI-NEXT: movsbl (%rdi), %eax ; AVX512VBMI-NEXT: shrl $8, %eax -; AVX512VBMI-NEXT: vpbroadcastb %al, %zmm0 +; AVX512VBMI-NEXT: vpbroadcastb %eax, %zmm0 ; AVX512VBMI-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 |