summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r--test/CodeGen/X86/adx-intrinsics.ll27
-rw-r--r--test/CodeGen/X86/avx512bw-intrinsics.ll26
-rw-r--r--test/CodeGen/X86/avx512bwvl-intrinsics.ll24
-rw-r--r--test/CodeGen/X86/pr33349.ll92
-rw-r--r--test/CodeGen/X86/pr34088.ll46
-rw-r--r--test/CodeGen/X86/select-mmx.ll120
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v16.ll4
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v8.ll6
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v16.ll2
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v32.ll2
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v32.ll8
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v64.ll4
12 files changed, 323 insertions, 38 deletions
diff --git a/test/CodeGen/X86/adx-intrinsics.ll b/test/CodeGen/X86/adx-intrinsics.ll
index 0498177a9c124..819a5df14e63f 100644
--- a/test/CodeGen/X86/adx-intrinsics.ll
+++ b/test/CodeGen/X86/adx-intrinsics.ll
@@ -75,3 +75,30 @@ define i8 @test_subborrow_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) {
ret i8 %ret;
}
+; Try a version with loads. Previously we crashed on this.
+define i32 @load_crash(i64* nocapture readonly %a, i64* nocapture readonly %b, i64* %res) {
+; CHECK-LABEL: load_crash
+; CHECK: addb
+; ADX: adcxq
+; CHECK: setb
+; CHECK: retq
+ %1 = load i64, i64* %a, align 8
+ %2 = load i64, i64* %b, align 8
+ %3 = bitcast i64* %res to i8*
+ %4 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 %1, i64 %2, i8* %3)
+ %conv = zext i8 %4 to i32
+ ret i32 %conv
+}
+
+; Try a really simple all zero input case, which also used to crash
+define void @allzeros() {
+; CHECK-LABEL: allzeros
+; CHECK: xorl
+; CHECK: addb
+; CHECK: sbbq
+; CHECK: andl
+; CHECK: retq
+entry:
+ %0 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 0, i64 0, i8* null)
+ ret void
+}
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll
index 5472f057ef27f..4abe3df9fc2a4 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -1921,9 +1921,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8>
; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rsi, %k1
-; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1}
-; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z}
-; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2
+; AVX512BW-NEXT: vpbroadcastb %edi, %zmm1 {%k1} {z}
+; AVX512BW-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
+; AVX512BW-NEXT: vpbroadcastb %edi, %zmm2
; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
@@ -1934,9 +1934,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8>
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
-; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z}
-; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1}
-; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2
+; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm1 {%k1} {z}
+; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm0 {%k1}
+; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm2
; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
@@ -1954,20 +1954,20 @@ define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i
; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
-; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1}
-; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z}
-; AVX512BW-NEXT: vpbroadcastw %di, %zmm2
+; AVX512BW-NEXT: vpbroadcastw %edi, %zmm1 {%k1} {z}
+; AVX512BW-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
+; AVX512BW-NEXT: vpbroadcastw %edi, %zmm2
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1}
-; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z}
-; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2
+; AVX512F-32-NEXT: movw {{[0-9]+}}(%esp), %ax
+; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm1 {%k1} {z}
+; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm0 {%k1}
+; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm2
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index c3ba6f106e6a7..9ceb3e5931a65 100644
--- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -2799,9 +2799,9 @@ define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8>
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
-; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7]
-; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf]
-; CHECK-NEXT: vpbroadcastb %dil, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7]
+; CHECK-NEXT: vpbroadcastb %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf]
+; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7]
+; CHECK-NEXT: vpbroadcastb %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7]
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -2819,9 +2819,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8>
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
-; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf]
-; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7]
-; CHECK-NEXT: vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7]
+; CHECK-NEXT: vpbroadcastb %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf]
+; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7]
+; CHECK-NEXT: vpbroadcastb %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7]
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -2839,9 +2839,9 @@ define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
-; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf]
-; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7]
-; CHECK-NEXT: vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7]
+; CHECK-NEXT: vpbroadcastw %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf]
+; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7]
+; CHECK-NEXT: vpbroadcastw %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -2859,9 +2859,9 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
-; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf]
-; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7]
-; CHECK-NEXT: vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7]
+; CHECK-NEXT: vpbroadcastw %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf]
+; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7]
+; CHECK-NEXT: vpbroadcastw %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
diff --git a/test/CodeGen/X86/pr33349.ll b/test/CodeGen/X86/pr33349.ll
new file mode 100644
index 0000000000000..db866db224814
--- /dev/null
+++ b/test/CodeGen/X86/pr33349.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=SKX
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr {
+; KNL-LABEL: test:
+; KNL: # BB#0: # %bb
+; KNL-NEXT: vpextrb $0, %xmm0, %eax
+; KNL-NEXT: testb $1, %al
+; KNL-NEXT: fld1
+; KNL-NEXT: fldz
+; KNL-NEXT: fld %st(0)
+; KNL-NEXT: fcmovne %st(2), %st(0)
+; KNL-NEXT: vpextrb $4, %xmm0, %eax
+; KNL-NEXT: testb $1, %al
+; KNL-NEXT: fld %st(1)
+; KNL-NEXT: fcmovne %st(3), %st(0)
+; KNL-NEXT: vpextrb $8, %xmm0, %eax
+; KNL-NEXT: testb $1, %al
+; KNL-NEXT: fld %st(2)
+; KNL-NEXT: fcmovne %st(4), %st(0)
+; KNL-NEXT: vpextrb $12, %xmm0, %eax
+; KNL-NEXT: testb $1, %al
+; KNL-NEXT: fxch %st(3)
+; KNL-NEXT: fcmovne %st(4), %st(0)
+; KNL-NEXT: fstp %st(4)
+; KNL-NEXT: fxch %st(3)
+; KNL-NEXT: fstpt 30(%rdi)
+; KNL-NEXT: fxch %st(1)
+; KNL-NEXT: fstpt 20(%rdi)
+; KNL-NEXT: fxch %st(1)
+; KNL-NEXT: fstpt 10(%rdi)
+; KNL-NEXT: fstpt (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test:
+; SKX: # BB#0: # %bb
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
+; SKX-NEXT: kshiftrw $2, %k0, %k1
+; SKX-NEXT: kshiftlw $15, %k1, %k2
+; SKX-NEXT: kshiftrw $15, %k2, %k2
+; SKX-NEXT: kshiftlw $15, %k2, %k2
+; SKX-NEXT: kshiftrw $15, %k2, %k2
+; SKX-NEXT: kmovd %k2, %eax
+; SKX-NEXT: testb $1, %al
+; SKX-NEXT: fld1
+; SKX-NEXT: fldz
+; SKX-NEXT: fld %st(0)
+; SKX-NEXT: fcmovne %st(2), %st(0)
+; SKX-NEXT: kshiftlw $14, %k1, %k1
+; SKX-NEXT: kshiftrw $15, %k1, %k1
+; SKX-NEXT: kshiftlw $15, %k1, %k1
+; SKX-NEXT: kshiftrw $15, %k1, %k1
+; SKX-NEXT: kmovd %k1, %eax
+; SKX-NEXT: testb $1, %al
+; SKX-NEXT: fld %st(1)
+; SKX-NEXT: fcmovne %st(3), %st(0)
+; SKX-NEXT: kshiftlw $15, %k0, %k1
+; SKX-NEXT: kshiftrw $15, %k1, %k1
+; SKX-NEXT: kshiftlw $15, %k1, %k1
+; SKX-NEXT: kshiftrw $15, %k1, %k1
+; SKX-NEXT: kmovd %k1, %eax
+; SKX-NEXT: testb $1, %al
+; SKX-NEXT: fld %st(2)
+; SKX-NEXT: fcmovne %st(4), %st(0)
+; SKX-NEXT: kshiftlw $14, %k0, %k0
+; SKX-NEXT: kshiftrw $15, %k0, %k0
+; SKX-NEXT: kshiftlw $15, %k0, %k0
+; SKX-NEXT: kshiftrw $15, %k0, %k0
+; SKX-NEXT: kmovd %k0, %eax
+; SKX-NEXT: testb $1, %al
+; SKX-NEXT: fxch %st(3)
+; SKX-NEXT: fcmovne %st(4), %st(0)
+; SKX-NEXT: fstp %st(4)
+; SKX-NEXT: fxch %st(3)
+; SKX-NEXT: fstpt 10(%rdi)
+; SKX-NEXT: fxch %st(1)
+; SKX-NEXT: fstpt (%rdi)
+; SKX-NEXT: fxch %st(1)
+; SKX-NEXT: fstpt 30(%rdi)
+; SKX-NEXT: fstpt 20(%rdi)
+; SKX-NEXT: retq
+ bb:
+ %tmp = select <4 x i1> %m, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
+ store <4 x x86_fp80> %tmp, <4 x x86_fp80>* %p, align 16
+ ret void
+ }
+
diff --git a/test/CodeGen/X86/pr34088.ll b/test/CodeGen/X86/pr34088.ll
new file mode 100644
index 0000000000000..d3667e3884d41
--- /dev/null
+++ b/test/CodeGen/X86/pr34088.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mcpu=pentium4 | FileCheck %s
+
+%struct.Foo = type { i32, %struct.Bar }
+%struct.Bar = type { i32, %struct.Buffer, i32 }
+%struct.Buffer = type { i8*, i32 }
+
+; This test checks that the load of store %2 is not dropped.
+;
+define i32 @pr34088() local_unnamed_addr {
+; CHECK-LABEL: pr34088:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: .Lcfi0:
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .Lcfi1:
+; CHECK-NEXT: .cfi_offset %ebp, -8
+; CHECK-NEXT: movl %esp, %ebp
+; CHECK-NEXT: .Lcfi2:
+; CHECK-NEXT: .cfi_def_cfa_register %ebp
+; CHECK-NEXT: andl $-16, %esp
+; CHECK-NEXT: subl $32, %esp
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movaps %xmm0, (%esp)
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movaps %xmm1, (%esp)
+; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
+; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl %ebp, %esp
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: retl
+entry:
+ %foo = alloca %struct.Foo, align 4
+ %0 = bitcast %struct.Foo* %foo to i8*
+ call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 0, i32 20, i32 4, i1 false)
+ %buffer1 = getelementptr inbounds %struct.Foo, %struct.Foo* %foo, i32 0, i32 1, i32 1
+ %1 = bitcast %struct.Buffer* %buffer1 to i64*
+ %2 = load i64, i64* %1, align 4
+ call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 -51, i32 20, i32 4, i1 false)
+ store i64 %2, i64* %1, align 4
+ ret i32 0
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)
diff --git a/test/CodeGen/X86/select-mmx.ll b/test/CodeGen/X86/select-mmx.ll
new file mode 100644
index 0000000000000..9e6382faaa59a
--- /dev/null
+++ b/test/CodeGen/X86/select-mmx.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+mmx < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-unknown-unknown -mattr=+mmx < %s | FileCheck %s --check-prefix=I32
+
+
+; From source: clang -02
+;__m64 test47(int a)
+;{
+; __m64 x = (a)? (__m64)(7): (__m64)(0);
+; return __builtin_ia32_psllw(x, x);
+;}
+
+define i64 @test47(i64 %arg) {
+;
+; X64-LABEL: test47:
+; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testq %rdi, %rdi
+; X64-NEXT: movl $7, %ecx
+; X64-NEXT: cmoveq %rcx, %rax
+; X64-NEXT: movd %rax, %mm0
+; X64-NEXT: psllw %mm0, %mm0
+; X64-NEXT: movd %mm0, %rax
+; X64-NEXT: retq
+;
+; I32-LABEL: test47:
+; I32: # BB#0:
+; I32-NEXT: pushl %ebp
+; I32-NEXT: .Lcfi0:
+; I32-NEXT: .cfi_def_cfa_offset 8
+; I32-NEXT: .Lcfi1:
+; I32-NEXT: .cfi_offset %ebp, -8
+; I32-NEXT: movl %esp, %ebp
+; I32-NEXT: .Lcfi2:
+; I32-NEXT: .cfi_def_cfa_register %ebp
+; I32-NEXT: andl $-8, %esp
+; I32-NEXT: subl $16, %esp
+; I32-NEXT: movl 8(%ebp), %eax
+; I32-NEXT: orl 12(%ebp), %eax
+; I32-NEXT: movl $7, %eax
+; I32-NEXT: je .LBB0_2
+; I32-NEXT: # BB#1:
+; I32-NEXT: xorl %eax, %eax
+; I32-NEXT: .LBB0_2:
+; I32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I32-NEXT: movl $0, {{[0-9]+}}(%esp)
+; I32-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; I32-NEXT: psllw %mm0, %mm0
+; I32-NEXT: movq %mm0, (%esp)
+; I32-NEXT: movl (%esp), %eax
+; I32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; I32-NEXT: movl %ebp, %esp
+; I32-NEXT: popl %ebp
+; I32-NEXT: retl
+ %cond = icmp eq i64 %arg, 0
+ %slct = select i1 %cond, x86_mmx bitcast (i64 7 to x86_mmx), x86_mmx bitcast (i64 0 to x86_mmx)
+ %psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct)
+ %retc = bitcast x86_mmx %psll to i64
+ ret i64 %retc
+}
+
+
+; From source: clang -O2
+;__m64 test49(int a, long long n, long long m)
+;{
+; __m64 x = (a)? (__m64)(n): (__m64)(m);
+; return __builtin_ia32_psllw(x, x);
+;}
+
+define i64 @test49(i64 %arg, i64 %x, i64 %y) {
+;
+; X64-LABEL: test49:
+; X64: # BB#0:
+; X64-NEXT: testq %rdi, %rdi
+; X64-NEXT: cmovneq %rdx, %rsi
+; X64-NEXT: movd %rsi, %mm0
+; X64-NEXT: psllw %mm0, %mm0
+; X64-NEXT: movd %mm0, %rax
+; X64-NEXT: retq
+;
+; I32-LABEL: test49:
+; I32: # BB#0:
+; I32-NEXT: pushl %ebp
+; I32-NEXT: .Lcfi3:
+; I32-NEXT: .cfi_def_cfa_offset 8
+; I32-NEXT: .Lcfi4:
+; I32-NEXT: .cfi_offset %ebp, -8
+; I32-NEXT: movl %esp, %ebp
+; I32-NEXT: .Lcfi5:
+; I32-NEXT: .cfi_def_cfa_register %ebp
+; I32-NEXT: andl $-8, %esp
+; I32-NEXT: subl $8, %esp
+; I32-NEXT: movl 8(%ebp), %eax
+; I32-NEXT: orl 12(%ebp), %eax
+; I32-NEXT: je .LBB1_1
+; I32-NEXT: # BB#2:
+; I32-NEXT: leal 24(%ebp), %eax
+; I32-NEXT: jmp .LBB1_3
+; I32-NEXT: .LBB1_1:
+; I32-NEXT: leal 16(%ebp), %eax
+; I32-NEXT: .LBB1_3:
+; I32-NEXT: movq (%eax), %mm0
+; I32-NEXT: psllw %mm0, %mm0
+; I32-NEXT: movq %mm0, (%esp)
+; I32-NEXT: movl (%esp), %eax
+; I32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; I32-NEXT: movl %ebp, %esp
+; I32-NEXT: popl %ebp
+; I32-NEXT: retl
+ %cond = icmp eq i64 %arg, 0
+ %xmmx = bitcast i64 %x to x86_mmx
+ %ymmx = bitcast i64 %y to x86_mmx
+ %slct = select i1 %cond, x86_mmx %xmmx, x86_mmx %ymmx
+ %psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct)
+ %retc = bitcast x86_mmx %psll to i64
+ ret i64 %retc
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx)
+
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index abba0ff87aced..9f1ed021992df 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -1643,7 +1643,7 @@ define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movsbl (%rdi), %eax
; AVX512VL-NEXT: shrl $8, %eax
-; AVX512VL-NEXT: vpbroadcastb %al, %xmm0
+; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i8, i8* %ptr, align 1
%tmp1 = sext i8 %tmp to i32
@@ -1696,7 +1696,7 @@ define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movsbl (%rdi), %eax
; AVX512VL-NEXT: shrl $16, %eax
-; AVX512VL-NEXT: vpbroadcastb %al, %xmm0
+; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i8, i8* %ptr, align 1
%tmp1 = sext i8 %tmp to i32
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index c03b9d1472c19..1cf8453fc6ad3 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -2274,7 +2274,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movswl (%rdi), %eax
-; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0
+; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
@@ -2390,7 +2390,7 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movswl (%rdi), %eax
; AVX512VL-NEXT: shrl $16, %eax
-; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0
+; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
@@ -2443,7 +2443,7 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movswl (%rdi), %eax
; AVX512VL-NEXT: shrl $16, %eax
-; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0
+; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll
index 6f5d916f2294b..ba7c0894b932d 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -4069,7 +4069,7 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movswl (%rdi), %eax
-; AVX512VL-NEXT: vpbroadcastw %ax, %ymm0
+; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll
index 05a797cb6f8e0..d51b69415b93a 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -2431,7 +2431,7 @@ define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(i8* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movsbl (%rdi), %eax
; AVX512VL-NEXT: shrl $8, %eax
-; AVX512VL-NEXT: vpbroadcastb %al, %ymm0
+; AVX512VL-NEXT: vpbroadcastb %eax, %ymm0
; AVX512VL-NEXT: retq
%tmp = load i8, i8* %ptr, align 1
%tmp1 = sext i8 %tmp to i32
diff --git a/test/CodeGen/X86/vector-shuffle-512-v32.ll b/test/CodeGen/X86/vector-shuffle-512-v32.ll
index 7a5c992bb8290..b8fc27ba55156 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v32.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v32.ll
@@ -228,7 +228,7 @@ define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) {
; SKX-LABEL: insert_dup_mem_v32i16_i32:
; SKX: ## BB#0:
; SKX-NEXT: movl (%rdi), %eax
-; SKX-NEXT: vpbroadcastw %ax, %zmm0
+; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -249,7 +249,7 @@ define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
; SKX-LABEL: insert_dup_mem_v32i16_sext_i16:
; SKX: ## BB#0:
; SKX-NEXT: movswl (%rdi), %eax
-; SKX-NEXT: vpbroadcastw %ax, %zmm0
+; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
@@ -269,7 +269,7 @@ define <32 x i16> @insert_dup_elt1_mem_v32i16_i32(i32* %ptr) #0 {
; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32:
; SKX: ## BB#0:
; SKX-NEXT: movzwl 2(%rdi), %eax
-; SKX-NEXT: vpbroadcastw %ax, %zmm0
+; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -288,7 +288,7 @@ define <32 x i16> @insert_dup_elt3_mem_v32i16_i32(i32* %ptr) #0 {
; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32:
; SKX: ## BB#0:
; SKX-NEXT: movzwl 2(%rdi), %eax
-; SKX-NEXT: vpbroadcastw %ax, %zmm0
+; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
diff --git a/test/CodeGen/X86/vector-shuffle-512-v64.ll b/test/CodeGen/X86/vector-shuffle-512-v64.ll
index f4650ec741a71..9dca3191e06b7 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v64.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v64.ll
@@ -332,7 +332,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) {
; AVX512BW: # BB#0:
; AVX512BW-NEXT: movsbl (%rdi), %eax
; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: vpbroadcastb %al, %zmm0
+; AVX512BW-NEXT: vpbroadcastb %eax, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
@@ -348,7 +348,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) {
; AVX512VBMI: # BB#0:
; AVX512VBMI-NEXT: movsbl (%rdi), %eax
; AVX512VBMI-NEXT: shrl $8, %eax
-; AVX512VBMI-NEXT: vpbroadcastb %al, %zmm0
+; AVX512VBMI-NEXT: vpbroadcastb %eax, %zmm0
; AVX512VBMI-NEXT: retq
%tmp = load i8, i8* %ptr, align 1
%tmp1 = sext i8 %tmp to i32