diff options
Diffstat (limited to 'test/CodeGen/X86/avx512vl-vec-cmp.ll')
-rw-r--r-- | test/CodeGen/X86/avx512vl-vec-cmp.ll | 144 |
1 files changed, 48 insertions, 96 deletions
diff --git a/test/CodeGen/X86/avx512vl-vec-cmp.ll b/test/CodeGen/X86/avx512vl-vec-cmp.ll index 25b9cc79096fe..e0acf2be653e2 100644 --- a/test/CodeGen/X86/avx512vl-vec-cmp.ll +++ b/test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -5,8 +5,7 @@ define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind { ; CHECK-LABEL: test256_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y @@ -17,8 +16,7 @@ define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind ; CHECK-LABEL: test256_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sgt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -29,8 +27,7 @@ define <8 x i32> @test256_3(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1) nounwind ; CHECK-LABEL: test256_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k1 -; CHECK-NEXT: vmovdqa32 %ymm2, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sge <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y @@ -41,8 +38,7 @@ define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind ; CHECK-LABEL: test256_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ugt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -53,8 +49,7 @@ define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwin ; CHECK-LABEL: test256_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp eq <8 x i32> %x, %y @@ -66,8 +61,7 @@ define <8 x i32> @test256_5b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_5b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp eq <8 x i32> %y, %x @@ -79,8 +73,7 @@ define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun ; CHECK-LABEL: test256_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sgt <8 x i32> %x, %y @@ -92,8 +85,7 @@ define <8 x i32> @test256_6b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nou ; CHECK-LABEL: test256_6b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp slt <8 x i32> %y, %x @@ -105,8 +97,7 @@ define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun ; CHECK-LABEL: test256_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sle <8 x i32> %x, %y @@ -118,8 +109,7 @@ define <8 x i32> @test256_7b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nou ; CHECK-LABEL: test256_7b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sge <8 x i32> %y, %x @@ -131,8 +121,7 @@ define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun ; CHECK-LABEL: test256_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp ule <8 x i32> %x, %y @@ -144,8 +133,7 @@ define <8 x i32> @test256_8b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nou ; CHECK-LABEL: test256_8b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp uge <8 x i32> %y, %x @@ -158,8 +146,7 @@ define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp eq <8 x i32> %x1, %y1 %mask0 = icmp eq <8 x i32> %x, %y @@ -173,8 +160,7 @@ define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpleq %ymm2, %ymm3, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vmovdqa %ymm2, %ymm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %mask0 = icmp sle <4 x i64> %x, %y @@ -188,8 +174,7 @@ define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sgt <4 x i64> %x1, %y1 %y = load <4 x i64>, <4 x i64>* %y.ptr, align 4 @@ -204,8 +189,7 @@ define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 @@ -219,8 +203,7 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind ; CHECK-LABEL: test256_13: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0 @@ -234,8 +217,7 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind ; CHECK-LABEL: test256_14: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi){1to8}, %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0 @@ -250,8 +232,7 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -268,8 +249,7 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -285,8 +265,7 @@ define <8 x i32> @test256_17(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_17: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp ne <8 x i32> %x, %y @@ -298,8 +277,7 @@ define <8 x i32> @test256_18(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_18: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp ne <8 x i32> %y, %x @@ -311,8 +289,7 @@ define <8 x i32> @test256_19(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_19: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnltud (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp uge <8 x i32> %x, %y @@ -324,8 +301,7 @@ define <8 x i32> @test256_20(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_20: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 -; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp uge <8 x i32> %y, %x @@ -337,8 +313,7 @@ define <2 x i64> @test128_1(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: test128_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y @@ -349,8 +324,7 @@ define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind ; CHECK-LABEL: test128_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sgt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -361,8 +335,7 @@ define <4 x i32> @test128_3(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1) nounwind ; CHECK-LABEL: test128_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k1 -; CHECK-NEXT: vmovdqa32 %xmm2, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sge <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y @@ -373,8 +346,7 @@ define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind ; CHECK-LABEL: test128_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ugt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -385,8 +357,7 @@ define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwin ; CHECK-LABEL: test128_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %x, %y @@ -398,8 +369,7 @@ define <4 x i32> @test128_5b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwi ; CHECK-LABEL: test128_5b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %y, %x @@ -411,8 +381,7 @@ define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun ; CHECK-LABEL: test128_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sgt <4 x i32> %x, %y @@ -424,8 +393,7 @@ define <4 x i32> @test128_6b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_6b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp slt <4 x i32> %y, %x @@ -437,8 +405,7 @@ define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun ; CHECK-LABEL: test128_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sle <4 x i32> %x, %y @@ -450,8 +417,7 @@ define <4 x i32> @test128_7b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_7b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sge <4 x i32> %y, %x @@ -463,8 +429,7 @@ define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun ; CHECK-LABEL: test128_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ule <4 x i32> %x, %y @@ -476,8 +441,7 @@ define <4 x i32> @test128_8b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_8b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %y, %x @@ -490,8 +454,7 @@ define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp eq <4 x i32> %x1, %y1 %mask0 = icmp eq <4 x i32> %x, %y @@ -505,8 +468,7 @@ define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpleq %xmm2, %xmm3, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovdqa %xmm2, %xmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm2, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %mask0 = icmp sle <2 x i64> %x, %y @@ -520,8 +482,7 @@ define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sgt <2 x i64> %x1, %y1 %y = load <2 x i64>, <2 x i64>* %y.ptr, align 4 @@ -536,8 +497,7 @@ define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 @@ -551,8 +511,7 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind ; CHECK-LABEL: test128_13: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0 @@ -566,8 +525,7 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind ; CHECK-LABEL: test128_14: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi){1to4}, %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0 @@ -582,8 +540,7 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -600,8 +557,7 @@ define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -617,8 +573,7 @@ define <4 x i32> @test128_17(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_17: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ne <4 x i32> %x, %y @@ -630,8 +585,7 @@ define <4 x i32> @test128_18(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_18: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ne <4 x i32> %y, %x @@ -643,8 +597,7 @@ define <4 x i32> @test128_19(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_19: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnltud (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %x, %y @@ -656,8 +609,7 @@ define <4 x i32> @test128_20(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_20: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 -; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovdqa %xmm1, %xmm0 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %y, %x |