diff options
Diffstat (limited to 'test/CodeGen/X86/merge-consecutive-loads-512.ll')
-rw-r--r-- | test/CodeGen/X86/merge-consecutive-loads-512.ll | 90 |
1 files changed, 57 insertions, 33 deletions
diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll index 8259a780c8fb6..c3500f0ad3991 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -136,21 +136,29 @@ define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noin } define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp { -; ALL-LABEL: merge_8f64_f64_1u3u5zu8: -; ALL: # BB#0: -; ALL-NEXT: vmovupd 8(%rdi), %zmm1 -; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; ALL-NEXT: vmovapd {{.*#+}} zmm0 = <0,u,2,u,4,13,u,7> -; ALL-NEXT: vpermi2pd %zmm2, %zmm1, %zmm0 -; ALL-NEXT: retq +; AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: +; AVX512F: # BB#0: +; AVX512F-NEXT: movb $32, %al +; AVX512F-NEXT: kmovw %eax, %k0 +; AVX512F-NEXT: knotw %k0, %k1 +; AVX512F-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: merge_8f64_f64_1u3u5zu8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: movb $32, %al +; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: knotw %k0, %k1 +; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm1 -; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; X32-AVX512F-NEXT: vmovapd {{.*#+}} zmm0 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0> -; X32-AVX512F-NEXT: vpermi2pd %zmm2, %zmm1, %zmm0 +; X32-AVX512F-NEXT: movb $32, %cl +; X32-AVX512F-NEXT: kmovw %ecx, %k0 +; X32-AVX512F-NEXT: knotw %k0, %k1 +; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0 {%k1} {z} ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 %ptr2 = getelementptr inbounds double, double* %ptr, i64 3 @@ -223,21 +231,29 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s } define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp { -; ALL-LABEL: merge_8i64_i64_1u3u5zu8: -; ALL: # BB#0: -; ALL-NEXT: vmovdqu64 8(%rdi), %zmm1 -; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; ALL-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,u,2,u,4,13,u,7> -; ALL-NEXT: vpermi2q %zmm2, %zmm1, %zmm0 -; ALL-NEXT: retq +; AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: +; AVX512F: # BB#0: +; AVX512F-NEXT: movb $32, %al +; AVX512F-NEXT: kmovw %eax, %k0 +; AVX512F-NEXT: knotw %k0, %k1 +; AVX512F-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: merge_8i64_i64_1u3u5zu8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: movb $32, %al +; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: knotw %k0, %k1 +; AVX512BW-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm1 -; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0> -; X32-AVX512F-NEXT: vpermi2q %zmm2, %zmm1, %zmm0 +; X32-AVX512F-NEXT: movb $32, %cl +; X32-AVX512F-NEXT: kmovw %ecx, %k0 +; X32-AVX512F-NEXT: knotw %k0, %k1 +; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0 {%k1} {z} ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1 %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3 @@ -446,21 +462,29 @@ define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable } define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp { -; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: -; ALL: # BB#0: -; ALL-NEXT: vmovdqu32 (%rdi), %zmm1 -; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; ALL-NEXT: vmovdqa32 {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> -; ALL-NEXT: vpermi2d %zmm2, %zmm1, %zmm0 -; ALL-NEXT: retq +; AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: +; AVX512F: # BB#0: +; AVX512F-NEXT: movw $8240, %ax # imm = 0x2030 +; AVX512F-NEXT: kmovw %eax, %k0 +; AVX512F-NEXT: knotw %k0, %k1 +; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: movw $8240, %ax # imm = 0x2030 +; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: knotw %k0, %k1 +; AVX512BW-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq ; ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm1 -; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; X32-AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> -; X32-AVX512F-NEXT: vpermi2d %zmm2, %zmm1, %zmm0 +; X32-AVX512F-NEXT: movw $8240, %cx # imm = 0x2030 +; X32-AVX512F-NEXT: kmovw %ecx, %k0 +; X32-AVX512F-NEXT: knotw %k0, %k1 +; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} {z} ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 |