diff options
Diffstat (limited to 'lib/Target/X86/X86InstrAVX512.td')
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 645 |
1 files changed, 620 insertions, 25 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 01a70323224c..cc5c09cbf0e5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -185,6 +185,20 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, v2f64x_info>; +class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, + ValueType _vt> { + RegisterClass KRC = _krc; + RegisterClass KRCWM = _krcwm; + ValueType KVT = _vt; +} + +def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; +def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; +def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; +def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; +def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; +def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; + // This multiclass generates the masking variants from the non-masking // variant. It only provides the assembly pieces for the masking variants. // It assumes custom ISel patterns for masking which can be provided as @@ -1735,17 +1749,217 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; -let Predicates = [HasAVX512, NoVLX] in { -def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPGTDZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPEQDZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -} +multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and _.KRCWM:$mask, + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask, + _.RC:$src1, _.RC:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert + (_.LdFrag addr:$src2))))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask, + _.RC:$src1, addr:$src2), + NewInf.KRC)>; +} +} + +multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> + : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast + (_.ScalarLdFrag addr:$src2)))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask, + _.RC:$src1, addr:$src2), + NewInf.KRC)>; +} +} + +// VPCMPEQB - i8 +defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQBZ256", [HasBWI, HasVLX]>; + +// VPCMPEQW - i16 +defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm, + "VPCMPEQWZ", [HasBWI]>; + +// VPCMPEQD - i32 +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm, + "VPCMPEQDZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm, + "VPCMPEQDZ", [HasAVX512]>; + +// VPCMPEQQ - i64 +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm, + "VPCMPEQQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm, + "VPCMPEQQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm, + "VPCMPEQQZ", [HasAVX512]>; + +// VPCMPGTB - i8 +defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTBZ256", [HasBWI, HasVLX]>; + +// VPCMPGTW - i16 +defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm, + "VPCMPGTWZ", [HasBWI]>; + +// VPCMPGTD - i32 +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm, + "VPCMPGTDZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm, + "VPCMPGTDZ", [HasAVX512]>; + +// VPCMPGTQ - i64 +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm, + "VPCMPGTQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm, + "VPCMPGTQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm, + "VPCMPGTQZ", [HasAVX512]>; multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { @@ -1908,6 +2122,237 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info, defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and _.KRCWM:$mask, + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask, + _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert + (_.LdFrag addr:$src2))), + imm:$cc)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask, + _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> + : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast + (_.ScalarLdFrag addr:$src2)), + imm:$cc)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask, + _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +// VPCMPB - i8 +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm, + "VPCMPBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm, + "VPCMPBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm, + "VPCMPBZ256", [HasBWI, HasVLX]>; + +// VPCMPW - i16 +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm, + "VPCMPWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm, + "VPCMPWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm, + "VPCMPWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm, + "VPCMPWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm, + "VPCMPWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm, + "VPCMPWZ", [HasBWI]>; + +// VPCMPD - i32 +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm, + "VPCMPDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm, + "VPCMPDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm, + "VPCMPDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm, + "VPCMPDZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm, + "VPCMPDZ", [HasAVX512]>; + +// VPCMPQ - i64 +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm, + "VPCMPQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm, + "VPCMPQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm, + "VPCMPQZ", [HasAVX512]>; + +// VPCMPUB - i8 +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu, + "VPCMPUBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu, + "VPCMPUBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu, + "VPCMPUBZ256", [HasBWI, HasVLX]>; + +// VPCMPUW - i16 +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu, + "VPCMPUWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu, + "VPCMPUWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu, + "VPCMPUWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu, + "VPCMPUWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu, + "VPCMPUWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu, + "VPCMPUWZ", [HasBWI]>; + +// VPCMPUD - i32 +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu, + "VPCMPUDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu, + "VPCMPUDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu, + "VPCMPUDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu, + "VPCMPUDZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu, + "VPCMPUDZ", [HasAVX512]>; + +// VPCMPUQ - i64 +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu, + "VPCMPUQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu, + "VPCMPUQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu, + "VPCMPUQZ", [HasAVX512]>; + multiclass avx512_vcmp_common<X86VectorVTInfo _> { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -1998,21 +2443,108 @@ defm VCMPPD : avx512_vcmp<avx512vl_f64_info>, defm VCMPPS : avx512_vcmp<avx512vl_f32_info>, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VCMPPSZrri - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPDZrri - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPUDZrri - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; +multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + string InstrStr, list<Predicate> Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + string InstrStr, list<Predicate> Preds> + : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> { + +let Predicates = Preds in + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpmRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; +} + + +// VCMPPS - f32 +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ", + [HasAVX512]>; +defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ", + [HasAVX512]>; + +// VCMPPD - f64 +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ", + [HasAVX512]>; +defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ", + [HasAVX512]>; +defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ", + [HasAVX512]>; // ---------------------------------------------------------------- // FPClass @@ -2498,6 +3030,69 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>; +multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> { +def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr) + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr) + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + (i8 8)), (i8 8))>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, + (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk) + (COPY_TO_REGCLASS VK8:$mask, VK16), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + (i8 8)), (i8 8))>; +} + +multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr, + AVX512VLVectorVTInfo _> { +def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri) + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri) + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), + (i8 8)), (i8 8))>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, + (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik) + (COPY_TO_REGCLASS VK8:$mask, VK16), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), + (i8 8)), (i8 8))>; +} + +let Predicates = [HasAVX512, NoVLX] in { + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">; + + defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>; + defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>; + defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>; +} + // Mask setting all 0s or 1s multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { let Predicates = [HasAVX512] in |