summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrAVX512.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86InstrAVX512.td')
-rw-r--r--lib/Target/X86/X86InstrAVX512.td645
1 files changed, 620 insertions, 25 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 01a70323224c..cc5c09cbf0e5 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -185,6 +185,20 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
v2f64x_info>;
+class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
+ ValueType _vt> {
+ RegisterClass KRC = _krc;
+ RegisterClass KRCWM = _krcwm;
+ ValueType KVT = _vt;
+}
+
+def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
+def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
+def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
+def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
+def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
+def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
+
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
// It assumes custom ISel patterns for masking which can be provided as
@@ -1735,17 +1749,217 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
-let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPGTDZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
-def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPEQDZrr
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
-}
+multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
+ _.RC:$src1, _.RC:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src2))))))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+}
+}
+
+multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds>
+ : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2),
+ NewInf.KRC)>;
+}
+}
+
+// VPCMPEQB - i8
+defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPEQW - i16
+defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQWZ", [HasBWI]>;
+
+// VPCMPEQD - i32
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQDZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQDZ", [HasAVX512]>;
+
+// VPCMPEQQ - i64
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
+ "VPCMPEQQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
+ "VPCMPEQQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm,
+ "VPCMPEQQZ", [HasAVX512]>;
+
+// VPCMPGTB - i8
+defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPGTW - i16
+defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTWZ", [HasBWI]>;
+
+// VPCMPGTD - i32
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTDZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTDZ", [HasAVX512]>;
+
+// VPCMPGTQ - i64
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm,
+ "VPCMPGTQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm,
+ "VPCMPGTQZ", [HasAVX512]>;
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm,
+ "VPCMPGTQZ", [HasAVX512]>;
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
X86VectorVTInfo _> {
@@ -1908,6 +2122,237 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
+ _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src2))),
+ imm:$cc)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
+ _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+}
+
+multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ SDNode OpNode, string InstrStr,
+ list<Predicate> Preds>
+ : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask,
+ _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+}
+
+// VPCMPB - i8
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm,
+ "VPCMPBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm,
+ "VPCMPBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm,
+ "VPCMPBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPW - i16
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm,
+ "VPCMPWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm,
+ "VPCMPWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm,
+ "VPCMPWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm,
+ "VPCMPWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm,
+ "VPCMPWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm,
+ "VPCMPWZ", [HasBWI]>;
+
+// VPCMPD - i32
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm,
+ "VPCMPDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm,
+ "VPCMPDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm,
+ "VPCMPDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm,
+ "VPCMPDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm,
+ "VPCMPDZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm,
+ "VPCMPDZ", [HasAVX512]>;
+
+// VPCMPQ - i64
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm,
+ "VPCMPQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm,
+ "VPCMPQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm,
+ "VPCMPQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm,
+ "VPCMPQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm,
+ "VPCMPQZ", [HasAVX512]>;
+
+// VPCMPUB - i8
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu,
+ "VPCMPUBZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu,
+ "VPCMPUBZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu,
+ "VPCMPUBZ256", [HasBWI, HasVLX]>;
+
+// VPCMPUW - i16
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu,
+ "VPCMPUWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu,
+ "VPCMPUWZ128", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu,
+ "VPCMPUWZ128", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu,
+ "VPCMPUWZ256", [HasBWI, HasVLX]>;
+defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu,
+ "VPCMPUWZ256", [HasBWI, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu,
+ "VPCMPUWZ", [HasBWI]>;
+
+// VPCMPUD - i32
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu,
+ "VPCMPUDZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu,
+ "VPCMPUDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu,
+ "VPCMPUDZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu,
+ "VPCMPUDZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu,
+ "VPCMPUDZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu,
+ "VPCMPUDZ", [HasAVX512]>;
+
+// VPCMPUQ - i64
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu,
+ "VPCMPUQZ128", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu,
+ "VPCMPUQZ256", [HasAVX512, HasVLX]>;
+
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu,
+ "VPCMPUQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu,
+ "VPCMPUQZ", [HasAVX512]>;
+defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu,
+ "VPCMPUQZ", [HasAVX512]>;
+
multiclass avx512_vcmp_common<X86VectorVTInfo _> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
@@ -1998,21 +2443,108 @@ defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
-def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
- (COPY_TO_REGCLASS (VCMPPSZrri
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc), VK8)>;
-def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
- (COPY_TO_REGCLASS (VPCMPDZrri
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc), VK8)>;
-def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
- (COPY_TO_REGCLASS (VPCMPUDZrri
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
- imm:$cc), VK8)>;
+multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ string InstrStr, list<Predicate> Preds> {
+let Predicates = Preds in {
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1,
+ addr:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+}
+
+multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
+ string InstrStr, list<Predicate> Preds>
+ : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {
+
+let Predicates = Preds in
+ def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
+ (_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc,
+ (i32 FROUND_NO_EXC))),
+ (i64 0)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
+ _.RC:$src2,
+ imm:$cc),
+ NewInf.KRC)>;
+}
+
+
+// VCMPPS - f32
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ",
+ [HasAVX512]>;
+defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ",
+ [HasAVX512]>;
+
+// VCMPPD - f64
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256",
+ [HasAVX512, HasVLX]>;
+
+defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ",
+ [HasAVX512]>;
+defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ",
+ [HasAVX512]>;
+defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ",
+ [HasAVX512]>;
// ----------------------------------------------------------------
// FPClass
@@ -2498,6 +3030,69 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
+multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
+def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ (i8 8)), (i8 8))>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (and VK8:$mask,
+ (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
+ (COPY_TO_REGCLASS VK8:$mask, VK16),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ (i8 8)), (i8 8))>;
+}
+
+multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
+ AVX512VLVectorVTInfo _> {
+def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+ imm:$cc), VK8)>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+ imm:$cc),
+ (i8 8)), (i8 8))>;
+
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (and VK8:$mask,
+ (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
+ (i64 0)),
+ (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
+ (COPY_TO_REGCLASS VK8:$mask, VK16),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+ imm:$cc),
+ (i8 8)), (i8 8))>;
+}
+
+let Predicates = [HasAVX512, NoVLX] in {
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
+
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
+}
+
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
let Predicates = [HasAVX512] in