diff options
Diffstat (limited to 'test/CodeGen/X86/avx512-gather-scatter-intrin.ll')
-rw-r--r-- | test/CodeGen/X86/avx512-gather-scatter-intrin.ll | 63 |
1 files changed, 59 insertions, 4 deletions
diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll index 3bc67cceaab5d..9ba18192f5d29 100644 --- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -259,18 +259,22 @@ define void @prefetch(<8 x i64> %ind, i8* %base) { ; CHECK: ## BB#0: ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} +; CHECK-NEXT: kxorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1} +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: kmovb %eax, %k1 ; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1} +; CHECK-NEXT: movb $120, %al +; CHECK-NEXT: kmovb %eax, %k1 ; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0) - call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1) - call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0) - call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1) + call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, i8* %base, i32 4, i32 1) + call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, i8* %base, i32 2, i32 0) + call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, i8* %base, i32 2, i32 1) ret void } - declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32) define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { @@ -790,3 +794,54 @@ define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, < ret void } +define void @scatter_mask_test(i8* %x0, <8 x i32> %x2, <8 x i32> %x3) { +; CHECK-LABEL: scatter_mask_test: +; CHECK: ## BB#0: +; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} +; CHECK-NEXT: kxorw %k0, %k0, %k1 +; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: kmovb %eax, %k1 +; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} +; CHECK-NEXT: movb $96, %al +; CHECK-NEXT: kmovb %eax, %k1 +; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} +; CHECK-NEXT: retq + call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 2) + call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 0, <8 x i32> %x2, <8 x i32> %x3, i32 4) + call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 1, <8 x i32> %x2, <8 x i32> %x3, i32 2) + call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 96, <8 x i32> %x2, <8 x i32> %x3, i32 4) + ret void +} + +define <16 x float> @gather_mask_test(<16 x i32> %ind, <16 x float> %src, i8* %base) { +; CHECK-LABEL: gather_mask_test: +; CHECK: ## BB#0: +; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: vmovaps %zmm1, %zmm2 +; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1} +; CHECK-NEXT: kxorw %k0, %k0, %k1 +; CHECK-NEXT: vmovaps %zmm1, %zmm3 +; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1} +; CHECK-NEXT: movw $1, %ax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm1, %zmm4 +; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm4 {%k1} +; CHECK-NEXT: movw $220, %ax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} +; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm0 +; CHECK-NEXT: vaddps %zmm4, %zmm1, %zmm1 +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 -1, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 0, i32 4) + %res2 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 1, i32 4) + %res3 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 220, i32 4) + + %res4 = fadd <16 x float> %res, %res1 + %res5 = fadd <16 x float> %res3, %res2 + %res6 = fadd <16 x float> %res5, %res4 + ret <16 x float> %res6 +} |